2021-06-29 14:45:23 -07:00
|
|
|
// Copyright 2021 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package textparse
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"encoding/binary"
|
2021-07-13 11:01:44 -07:00
|
|
|
"fmt"
|
2021-06-29 14:45:23 -07:00
|
|
|
"io"
|
2021-07-13 11:01:44 -07:00
|
|
|
"math"
|
|
|
|
"strings"
|
2021-06-29 14:45:23 -07:00
|
|
|
"unicode/utf8"
|
|
|
|
|
|
|
|
"github.com/gogo/protobuf/proto"
|
|
|
|
"github.com/pkg/errors"
|
|
|
|
"github.com/prometheus/common/model"
|
2021-11-17 10:57:31 -08:00
|
|
|
|
|
|
|
"github.com/prometheus/prometheus/model/exemplar"
|
Style cleanup of all the changes in sparsehistogram so far
A lot of this code was hacked together, literally during a
hackathon. This commit intends not to change the code substantially,
but just make the code obey the usual style practices.
A (possibly incomplete) list of areas:
* Generally address linter warnings.
* The `pgk` directory is deprecated as per dev-summit. No new packages should
be added to it. I moved the new `pkg/histogram` package to `model`
anticipating what's proposed in #9478.
* Make the naming of the Sparse Histogram more consistent. Including
abbreviations, there were just too many names for it: SparseHistogram,
Histogram, Histo, hist, his, shs, h. The idea is to call it "Histogram" in
general. Only add "Sparse" if it is needed to avoid confusion with
conventional Histograms (which is rare because the TSDB really has no notion
of conventional Histograms). Use abbreviations only in local scope, and then
really abbreviate (not just removing three out of seven letters like in
"Histo"). This is in the spirit of
https://github.com/golang/go/wiki/CodeReviewComments#variable-names
* Several other minor name changes.
* A lot of formatting of doc comments. For one, following
https://github.com/golang/go/wiki/CodeReviewComments#comment-sentences
, but also layout question, anticipating how things will look like
when rendered by `godoc` (even where `godoc` doesn't render them
right now because they are for unexported types or not a doc comment
at all but just a normal code comment - consistency is queen!).
* Re-enabled `TestQueryLog` and `TestEndopints` (they pass now,
leaving them disabled was presumably an oversight).
* Bucket iterator for histogram.Histogram is now created with a
method.
* HistogramChunk.iterator now allows iterator recycling. (I think
@dieterbe only commented it out because he was confused by the
question in the comment.)
* HistogramAppender.Append panics now because we decided to treat
staleness marker differently.
Signed-off-by: beorn7 <beorn@grafana.com>
2021-10-09 06:57:07 -07:00
|
|
|
"github.com/prometheus/prometheus/model/histogram"
|
2021-11-17 10:57:31 -08:00
|
|
|
"github.com/prometheus/prometheus/model/labels"
|
2021-06-29 14:45:23 -07:00
|
|
|
|
|
|
|
dto "github.com/prometheus/prometheus/prompb/io/prometheus/client"
|
|
|
|
)
|
|
|
|
|
|
|
|
// ProtobufParser is a very inefficient way of unmarshaling the old Prometheus
|
|
|
|
// protobuf format and then present it as it if were parsed by a
|
|
|
|
// Prometheus-2-style text parser. This is only done so that we can easily plug
|
|
|
|
// in the protobuf format into Prometheus 2. For future use (with the final
|
2022-07-19 09:11:33 -07:00
|
|
|
// format that will be used for native histograms), we have to revisit the
|
2021-06-29 14:45:23 -07:00
|
|
|
// parsing. A lot of the efficiency tricks of the Prometheus-2-style parsing
|
|
|
|
// could be used in a similar fashion (byte-slice pointers into the raw
|
|
|
|
// payload), which requires some hand-coded protobuf handling. But the current
|
|
|
|
// parsers all expect the full series name (metric name plus label pairs) as one
|
|
|
|
// string, which is not how things are represented in the protobuf format. If
|
|
|
|
// the re-arrangement work is actually causing problems (which has to be seen),
|
|
|
|
// that expectation needs to be changed.
|
|
|
|
type ProtobufParser struct {
|
2021-07-13 11:01:44 -07:00
|
|
|
in []byte // The intput to parse.
|
|
|
|
inPos int // Position within the input.
|
|
|
|
metricPos int // Position within Metric slice.
|
|
|
|
// fieldPos is the position within a Summary or (legacy) Histogram. -2
|
|
|
|
// is the count. -1 is the sum. Otherwise it is the index within
|
|
|
|
// quantiles/buckets.
|
2023-05-10 16:59:21 -07:00
|
|
|
fieldPos int
|
|
|
|
fieldsDone bool // true if no more fields of a Summary or (legacy) Histogram to be processed.
|
2023-07-12 09:42:02 -07:00
|
|
|
redoClassic bool // true after parsing a native histogram if we need to parse it again as a classic histogram.
|
2023-05-10 16:59:21 -07:00
|
|
|
|
2021-07-13 11:01:44 -07:00
|
|
|
// state is marked by the entry we are processing. EntryInvalid implies
|
|
|
|
// that we have to decode the next MetricFamily.
|
|
|
|
state Entry
|
|
|
|
|
2022-03-09 14:13:50 -08:00
|
|
|
builder labels.ScratchBuilder // held here to reduce allocations when building Labels
|
|
|
|
|
2021-07-13 11:01:44 -07:00
|
|
|
mf *dto.MetricFamily
|
2021-06-29 14:45:23 -07:00
|
|
|
|
2023-05-10 16:59:21 -07:00
|
|
|
// Wether to also parse a classic histogram that is also present as a
|
|
|
|
// native histogram.
|
|
|
|
parseClassicHistograms bool
|
|
|
|
|
2021-06-29 14:45:23 -07:00
|
|
|
// The following are just shenanigans to satisfy the Parser interface.
|
|
|
|
metricBytes *bytes.Buffer // A somewhat fluid representation of the current metric.
|
|
|
|
}
|
|
|
|
|
Style cleanup of all the changes in sparsehistogram so far
A lot of this code was hacked together, literally during a
hackathon. This commit intends not to change the code substantially,
but just make the code obey the usual style practices.
A (possibly incomplete) list of areas:
* Generally address linter warnings.
* The `pgk` directory is deprecated as per dev-summit. No new packages should
be added to it. I moved the new `pkg/histogram` package to `model`
anticipating what's proposed in #9478.
* Make the naming of the Sparse Histogram more consistent. Including
abbreviations, there were just too many names for it: SparseHistogram,
Histogram, Histo, hist, his, shs, h. The idea is to call it "Histogram" in
general. Only add "Sparse" if it is needed to avoid confusion with
conventional Histograms (which is rare because the TSDB really has no notion
of conventional Histograms). Use abbreviations only in local scope, and then
really abbreviate (not just removing three out of seven letters like in
"Histo"). This is in the spirit of
https://github.com/golang/go/wiki/CodeReviewComments#variable-names
* Several other minor name changes.
* A lot of formatting of doc comments. For one, following
https://github.com/golang/go/wiki/CodeReviewComments#comment-sentences
, but also layout question, anticipating how things will look like
when rendered by `godoc` (even where `godoc` doesn't render them
right now because they are for unexported types or not a doc comment
at all but just a normal code comment - consistency is queen!).
* Re-enabled `TestQueryLog` and `TestEndopints` (they pass now,
leaving them disabled was presumably an oversight).
* Bucket iterator for histogram.Histogram is now created with a
method.
* HistogramChunk.iterator now allows iterator recycling. (I think
@dieterbe only commented it out because he was confused by the
question in the comment.)
* HistogramAppender.Append panics now because we decided to treat
staleness marker differently.
Signed-off-by: beorn7 <beorn@grafana.com>
2021-10-09 06:57:07 -07:00
|
|
|
// NewProtobufParser returns a parser for the payload in the byte slice.
|
2023-05-10 16:59:21 -07:00
|
|
|
func NewProtobufParser(b []byte, parseClassicHistograms bool) Parser {
|
2021-06-29 14:45:23 -07:00
|
|
|
return &ProtobufParser{
|
2023-05-10 16:59:21 -07:00
|
|
|
in: b,
|
|
|
|
state: EntryInvalid,
|
|
|
|
mf: &dto.MetricFamily{},
|
|
|
|
metricBytes: &bytes.Buffer{},
|
|
|
|
parseClassicHistograms: parseClassicHistograms,
|
2021-06-29 14:45:23 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Series returns the bytes of a series with a simple float64 as a
|
|
|
|
// value, the timestamp if set, and the value of the current sample.
|
|
|
|
func (p *ProtobufParser) Series() ([]byte, *int64, float64) {
|
|
|
|
var (
|
|
|
|
m = p.mf.GetMetric()[p.metricPos]
|
|
|
|
ts = m.GetTimestampMs()
|
|
|
|
v float64
|
|
|
|
)
|
|
|
|
switch p.mf.GetType() {
|
|
|
|
case dto.MetricType_COUNTER:
|
2021-07-09 12:00:18 -07:00
|
|
|
v = m.GetCounter().GetValue()
|
2021-06-29 14:45:23 -07:00
|
|
|
case dto.MetricType_GAUGE:
|
2021-07-09 12:00:18 -07:00
|
|
|
v = m.GetGauge().GetValue()
|
2021-06-29 14:45:23 -07:00
|
|
|
case dto.MetricType_UNTYPED:
|
2021-07-09 12:00:18 -07:00
|
|
|
v = m.GetUntyped().GetValue()
|
2021-07-13 11:01:44 -07:00
|
|
|
case dto.MetricType_SUMMARY:
|
|
|
|
s := m.GetSummary()
|
|
|
|
switch p.fieldPos {
|
|
|
|
case -2:
|
|
|
|
v = float64(s.GetSampleCount())
|
|
|
|
case -1:
|
|
|
|
v = s.GetSampleSum()
|
2023-07-18 15:59:41 -07:00
|
|
|
// Need to detect summaries without quantile here.
|
2021-08-30 22:17:57 -07:00
|
|
|
if len(s.GetQuantile()) == 0 {
|
|
|
|
p.fieldsDone = true
|
|
|
|
}
|
2021-07-13 11:01:44 -07:00
|
|
|
default:
|
|
|
|
v = s.GetQuantile()[p.fieldPos].GetValue()
|
|
|
|
}
|
2023-01-05 06:39:10 -08:00
|
|
|
case dto.MetricType_HISTOGRAM, dto.MetricType_GAUGE_HISTOGRAM:
|
2023-05-10 16:59:21 -07:00
|
|
|
// This should only happen for a classic histogram.
|
2021-07-13 11:01:44 -07:00
|
|
|
h := m.GetHistogram()
|
|
|
|
switch p.fieldPos {
|
|
|
|
case -2:
|
2023-05-10 16:59:21 -07:00
|
|
|
v = h.GetSampleCountFloat()
|
|
|
|
if v == 0 {
|
|
|
|
v = float64(h.GetSampleCount())
|
|
|
|
}
|
2021-07-13 11:01:44 -07:00
|
|
|
case -1:
|
|
|
|
v = h.GetSampleSum()
|
|
|
|
default:
|
|
|
|
bb := h.GetBucket()
|
|
|
|
if p.fieldPos >= len(bb) {
|
2023-05-10 16:59:21 -07:00
|
|
|
v = h.GetSampleCountFloat()
|
|
|
|
if v == 0 {
|
|
|
|
v = float64(h.GetSampleCount())
|
|
|
|
}
|
2021-07-13 11:01:44 -07:00
|
|
|
} else {
|
2023-05-10 16:59:21 -07:00
|
|
|
v = bb[p.fieldPos].GetCumulativeCountFloat()
|
|
|
|
if v == 0 {
|
|
|
|
v = float64(bb[p.fieldPos].GetCumulativeCount())
|
|
|
|
}
|
2021-07-13 11:01:44 -07:00
|
|
|
}
|
|
|
|
}
|
2021-06-29 14:45:23 -07:00
|
|
|
default:
|
|
|
|
panic("encountered unexpected metric type, this is a bug")
|
|
|
|
}
|
|
|
|
if ts != 0 {
|
|
|
|
return p.metricBytes.Bytes(), &ts, v
|
|
|
|
}
|
|
|
|
// Nasty hack: Assume that ts==0 means no timestamp. That's not true in
|
|
|
|
// general, but proto3 has no distinction between unset and
|
|
|
|
// default. Need to avoid in the final format.
|
|
|
|
return p.metricBytes.Bytes(), nil, v
|
|
|
|
}
|
|
|
|
|
histograms: Add Compact method to the normal integer Histogram
And use the new method to call to compact Histograms during
parsing. This happens for both `Histogram` and `FloatHistogram`. In
this way, if targets decide to optimize the exposition size by merging
spans with empty buckets in between, we still get a normalized
results. It will also normalize away any valid but weird
representations like empty spans, spans with offset zero, and empty
buckets at the start or end of a span.
The implementation seemed easy at first as it just turns the
`compactBuckets` helper into a generic function (which now got its own
file). However, the integer Histograms have delta buckets instead of
absolute buckets, which had to be treated specially in the generic
`compactBuckets` function. To make sure it works, I have added plenty
of explicit tests for `Histogram` in addition to the `FloatHistogram`
tests.
I have also updated the doc comment for the `Compact` method.
Based on the insights now expressed in the doc comment, compacting
with a maxEmptyBuckets > 0 is rarely useful. Therefore, this commit
also sets the value to 0 in the two cases we were using 3 so far. We
might still want to reconsider, so I don't want to remove the
maxEmptyBuckets parameter right now.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-09-27 04:04:16 -07:00
|
|
|
// Histogram returns the bytes of a series with a native histogram as a value,
|
|
|
|
// the timestamp if set, and the native histogram in the current sample.
|
|
|
|
//
|
|
|
|
// The Compact method is called before returning the Histogram (or FloatHistogram).
|
|
|
|
//
|
|
|
|
// If the SampleCountFloat or the ZeroCountFloat in the proto message is > 0,
|
|
|
|
// the histogram is parsed and returned as a FloatHistogram and nil is returned
|
|
|
|
// as the (integer) Histogram return value. Otherwise, it is parsed and returned
|
|
|
|
// as an (integer) Histogram and nil is returned as the FloatHistogram return
|
|
|
|
// value.
|
2022-08-25 08:07:41 -07:00
|
|
|
func (p *ProtobufParser) Histogram() ([]byte, *int64, *histogram.Histogram, *histogram.FloatHistogram) {
|
2021-06-29 14:45:23 -07:00
|
|
|
var (
|
|
|
|
m = p.mf.GetMetric()[p.metricPos]
|
|
|
|
ts = m.GetTimestampMs()
|
|
|
|
h = m.GetHistogram()
|
|
|
|
)
|
2023-05-10 16:59:21 -07:00
|
|
|
if p.parseClassicHistograms && len(h.GetBucket()) > 0 {
|
|
|
|
p.redoClassic = true
|
|
|
|
}
|
2022-08-25 08:07:41 -07:00
|
|
|
if h.GetSampleCountFloat() > 0 || h.GetZeroCountFloat() > 0 {
|
|
|
|
// It is a float histogram.
|
|
|
|
fh := histogram.FloatHistogram{
|
|
|
|
Count: h.GetSampleCountFloat(),
|
|
|
|
Sum: h.GetSampleSum(),
|
|
|
|
ZeroThreshold: h.GetZeroThreshold(),
|
|
|
|
ZeroCount: h.GetZeroCountFloat(),
|
|
|
|
Schema: h.GetSchema(),
|
|
|
|
PositiveSpans: make([]histogram.Span, len(h.GetPositiveSpan())),
|
|
|
|
PositiveBuckets: h.GetPositiveCount(),
|
|
|
|
NegativeSpans: make([]histogram.Span, len(h.GetNegativeSpan())),
|
|
|
|
NegativeBuckets: h.GetNegativeCount(),
|
|
|
|
}
|
|
|
|
for i, span := range h.GetPositiveSpan() {
|
|
|
|
fh.PositiveSpans[i].Offset = span.GetOffset()
|
|
|
|
fh.PositiveSpans[i].Length = span.GetLength()
|
|
|
|
}
|
|
|
|
for i, span := range h.GetNegativeSpan() {
|
|
|
|
fh.NegativeSpans[i].Offset = span.GetOffset()
|
|
|
|
fh.NegativeSpans[i].Length = span.GetLength()
|
|
|
|
}
|
2023-01-10 07:25:23 -08:00
|
|
|
if p.mf.GetType() == dto.MetricType_GAUGE_HISTOGRAM {
|
|
|
|
fh.CounterResetHint = histogram.GaugeType
|
|
|
|
}
|
histograms: Add Compact method to the normal integer Histogram
And use the new method to call to compact Histograms during
parsing. This happens for both `Histogram` and `FloatHistogram`. In
this way, if targets decide to optimize the exposition size by merging
spans with empty buckets in between, we still get a normalized
results. It will also normalize away any valid but weird
representations like empty spans, spans with offset zero, and empty
buckets at the start or end of a span.
The implementation seemed easy at first as it just turns the
`compactBuckets` helper into a generic function (which now got its own
file). However, the integer Histograms have delta buckets instead of
absolute buckets, which had to be treated specially in the generic
`compactBuckets` function. To make sure it works, I have added plenty
of explicit tests for `Histogram` in addition to the `FloatHistogram`
tests.
I have also updated the doc comment for the `Compact` method.
Based on the insights now expressed in the doc comment, compacting
with a maxEmptyBuckets > 0 is rarely useful. Therefore, this commit
also sets the value to 0 in the two cases we were using 3 so far. We
might still want to reconsider, so I don't want to remove the
maxEmptyBuckets parameter right now.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-09-27 04:04:16 -07:00
|
|
|
fh.Compact(0)
|
2022-08-25 08:07:41 -07:00
|
|
|
if ts != 0 {
|
|
|
|
return p.metricBytes.Bytes(), &ts, nil, &fh
|
|
|
|
}
|
|
|
|
// Nasty hack: Assume that ts==0 means no timestamp. That's not true in
|
|
|
|
// general, but proto3 has no distinction between unset and
|
|
|
|
// default. Need to avoid in the final format.
|
|
|
|
return p.metricBytes.Bytes(), nil, nil, &fh
|
|
|
|
}
|
|
|
|
|
Style cleanup of all the changes in sparsehistogram so far
A lot of this code was hacked together, literally during a
hackathon. This commit intends not to change the code substantially,
but just make the code obey the usual style practices.
A (possibly incomplete) list of areas:
* Generally address linter warnings.
* The `pgk` directory is deprecated as per dev-summit. No new packages should
be added to it. I moved the new `pkg/histogram` package to `model`
anticipating what's proposed in #9478.
* Make the naming of the Sparse Histogram more consistent. Including
abbreviations, there were just too many names for it: SparseHistogram,
Histogram, Histo, hist, his, shs, h. The idea is to call it "Histogram" in
general. Only add "Sparse" if it is needed to avoid confusion with
conventional Histograms (which is rare because the TSDB really has no notion
of conventional Histograms). Use abbreviations only in local scope, and then
really abbreviate (not just removing three out of seven letters like in
"Histo"). This is in the spirit of
https://github.com/golang/go/wiki/CodeReviewComments#variable-names
* Several other minor name changes.
* A lot of formatting of doc comments. For one, following
https://github.com/golang/go/wiki/CodeReviewComments#comment-sentences
, but also layout question, anticipating how things will look like
when rendered by `godoc` (even where `godoc` doesn't render them
right now because they are for unexported types or not a doc comment
at all but just a normal code comment - consistency is queen!).
* Re-enabled `TestQueryLog` and `TestEndopints` (they pass now,
leaving them disabled was presumably an oversight).
* Bucket iterator for histogram.Histogram is now created with a
method.
* HistogramChunk.iterator now allows iterator recycling. (I think
@dieterbe only commented it out because he was confused by the
question in the comment.)
* HistogramAppender.Append panics now because we decided to treat
staleness marker differently.
Signed-off-by: beorn7 <beorn@grafana.com>
2021-10-09 06:57:07 -07:00
|
|
|
sh := histogram.Histogram{
|
2021-06-29 14:45:23 -07:00
|
|
|
Count: h.GetSampleCount(),
|
|
|
|
Sum: h.GetSampleSum(),
|
2022-07-19 09:11:33 -07:00
|
|
|
ZeroThreshold: h.GetZeroThreshold(),
|
|
|
|
ZeroCount: h.GetZeroCount(),
|
|
|
|
Schema: h.GetSchema(),
|
|
|
|
PositiveSpans: make([]histogram.Span, len(h.GetPositiveSpan())),
|
|
|
|
PositiveBuckets: h.GetPositiveDelta(),
|
|
|
|
NegativeSpans: make([]histogram.Span, len(h.GetNegativeSpan())),
|
|
|
|
NegativeBuckets: h.GetNegativeDelta(),
|
2021-06-29 14:45:23 -07:00
|
|
|
}
|
2022-07-19 09:11:33 -07:00
|
|
|
for i, span := range h.GetPositiveSpan() {
|
2021-06-29 14:45:23 -07:00
|
|
|
sh.PositiveSpans[i].Offset = span.GetOffset()
|
|
|
|
sh.PositiveSpans[i].Length = span.GetLength()
|
|
|
|
}
|
2022-07-19 09:11:33 -07:00
|
|
|
for i, span := range h.GetNegativeSpan() {
|
2021-06-29 14:45:23 -07:00
|
|
|
sh.NegativeSpans[i].Offset = span.GetOffset()
|
|
|
|
sh.NegativeSpans[i].Length = span.GetLength()
|
|
|
|
}
|
2023-01-10 07:25:23 -08:00
|
|
|
if p.mf.GetType() == dto.MetricType_GAUGE_HISTOGRAM {
|
|
|
|
sh.CounterResetHint = histogram.GaugeType
|
|
|
|
}
|
histograms: Add Compact method to the normal integer Histogram
And use the new method to call to compact Histograms during
parsing. This happens for both `Histogram` and `FloatHistogram`. In
this way, if targets decide to optimize the exposition size by merging
spans with empty buckets in between, we still get a normalized
results. It will also normalize away any valid but weird
representations like empty spans, spans with offset zero, and empty
buckets at the start or end of a span.
The implementation seemed easy at first as it just turns the
`compactBuckets` helper into a generic function (which now got its own
file). However, the integer Histograms have delta buckets instead of
absolute buckets, which had to be treated specially in the generic
`compactBuckets` function. To make sure it works, I have added plenty
of explicit tests for `Histogram` in addition to the `FloatHistogram`
tests.
I have also updated the doc comment for the `Compact` method.
Based on the insights now expressed in the doc comment, compacting
with a maxEmptyBuckets > 0 is rarely useful. Therefore, this commit
also sets the value to 0 in the two cases we were using 3 so far. We
might still want to reconsider, so I don't want to remove the
maxEmptyBuckets parameter right now.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-09-27 04:04:16 -07:00
|
|
|
sh.Compact(0)
|
2021-06-29 14:45:23 -07:00
|
|
|
if ts != 0 {
|
2022-08-25 08:07:41 -07:00
|
|
|
return p.metricBytes.Bytes(), &ts, &sh, nil
|
2021-06-29 14:45:23 -07:00
|
|
|
}
|
2022-08-25 08:07:41 -07:00
|
|
|
return p.metricBytes.Bytes(), nil, &sh, nil
|
2021-06-29 14:45:23 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// Help returns the metric name and help text in the current entry.
|
|
|
|
// Must only be called after Next returned a help entry.
|
|
|
|
// The returned byte slices become invalid after the next call to Next.
|
|
|
|
func (p *ProtobufParser) Help() ([]byte, []byte) {
|
|
|
|
return p.metricBytes.Bytes(), []byte(p.mf.GetHelp())
|
|
|
|
}
|
|
|
|
|
|
|
|
// Type returns the metric name and type in the current entry.
|
|
|
|
// Must only be called after Next returned a type entry.
|
|
|
|
// The returned byte slices become invalid after the next call to Next.
|
|
|
|
func (p *ProtobufParser) Type() ([]byte, MetricType) {
|
|
|
|
n := p.metricBytes.Bytes()
|
|
|
|
switch p.mf.GetType() {
|
|
|
|
case dto.MetricType_COUNTER:
|
|
|
|
return n, MetricTypeCounter
|
|
|
|
case dto.MetricType_GAUGE:
|
|
|
|
return n, MetricTypeGauge
|
|
|
|
case dto.MetricType_HISTOGRAM:
|
2021-07-09 12:00:18 -07:00
|
|
|
return n, MetricTypeHistogram
|
2023-01-05 06:39:10 -08:00
|
|
|
case dto.MetricType_GAUGE_HISTOGRAM:
|
|
|
|
return n, MetricTypeGaugeHistogram
|
2021-07-13 11:01:44 -07:00
|
|
|
case dto.MetricType_SUMMARY:
|
|
|
|
return n, MetricTypeSummary
|
2021-06-29 14:45:23 -07:00
|
|
|
}
|
|
|
|
return n, MetricTypeUnknown
|
|
|
|
}
|
|
|
|
|
|
|
|
// Unit always returns (nil, nil) because units aren't supported by the protobuf
|
|
|
|
// format.
|
|
|
|
func (p *ProtobufParser) Unit() ([]byte, []byte) {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Comment always returns nil because comments aren't supported by the protobuf
|
|
|
|
// format.
|
|
|
|
func (p *ProtobufParser) Comment() []byte {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Metric writes the labels of the current sample into the passed labels.
|
|
|
|
// It returns the string from which the metric was parsed.
|
|
|
|
func (p *ProtobufParser) Metric(l *labels.Labels) string {
|
2022-03-09 14:13:50 -08:00
|
|
|
p.builder.Reset()
|
|
|
|
p.builder.Add(labels.MetricName, p.getMagicName())
|
2021-06-29 14:45:23 -07:00
|
|
|
|
|
|
|
for _, lp := range p.mf.GetMetric()[p.metricPos].GetLabel() {
|
2022-03-09 14:13:50 -08:00
|
|
|
p.builder.Add(lp.GetName(), lp.GetValue())
|
2021-06-29 14:45:23 -07:00
|
|
|
}
|
2021-07-13 11:01:44 -07:00
|
|
|
if needed, name, value := p.getMagicLabel(); needed {
|
2022-03-09 14:13:50 -08:00
|
|
|
p.builder.Add(name, value)
|
2021-07-13 11:01:44 -07:00
|
|
|
}
|
2021-06-29 14:45:23 -07:00
|
|
|
|
|
|
|
// Sort labels to maintain the sorted labels invariant.
|
2022-03-09 14:13:50 -08:00
|
|
|
p.builder.Sort()
|
|
|
|
*l = p.builder.Labels()
|
2021-06-29 14:45:23 -07:00
|
|
|
|
|
|
|
return p.metricBytes.String()
|
|
|
|
}
|
|
|
|
|
2021-07-13 06:11:26 -07:00
|
|
|
// Exemplar writes the exemplar of the current sample into the passed
|
2022-07-19 09:11:33 -07:00
|
|
|
// exemplar. It returns if an exemplar exists or not. In case of a native
|
2021-07-13 11:01:44 -07:00
|
|
|
// histogram, the legacy bucket section is still used for exemplars. To ingest
|
|
|
|
// all examplars, call the Exemplar method repeatedly until it returns false.
|
2021-07-13 06:11:26 -07:00
|
|
|
func (p *ProtobufParser) Exemplar(ex *exemplar.Exemplar) bool {
|
|
|
|
m := p.mf.GetMetric()[p.metricPos]
|
|
|
|
var exProto *dto.Exemplar
|
|
|
|
switch p.mf.GetType() {
|
|
|
|
case dto.MetricType_COUNTER:
|
|
|
|
exProto = m.GetCounter().GetExemplar()
|
2023-01-05 06:39:10 -08:00
|
|
|
case dto.MetricType_HISTOGRAM, dto.MetricType_GAUGE_HISTOGRAM:
|
2021-07-13 11:01:44 -07:00
|
|
|
bb := m.GetHistogram().GetBucket()
|
|
|
|
if p.fieldPos < 0 {
|
|
|
|
if p.state == EntrySeries {
|
|
|
|
return false // At _count or _sum.
|
|
|
|
}
|
2022-07-19 09:11:33 -07:00
|
|
|
p.fieldPos = 0 // Start at 1st bucket for native histograms.
|
2021-07-13 11:01:44 -07:00
|
|
|
}
|
|
|
|
for p.fieldPos < len(bb) {
|
|
|
|
exProto = bb[p.fieldPos].GetExemplar()
|
|
|
|
if p.state == EntrySeries {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
p.fieldPos++
|
|
|
|
if exProto != nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
2021-07-13 06:11:26 -07:00
|
|
|
default:
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if exProto == nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
ex.Value = exProto.GetValue()
|
|
|
|
if ts := exProto.GetTimestamp(); ts != nil {
|
|
|
|
ex.HasTs = true
|
|
|
|
ex.Ts = ts.GetSeconds()*1000 + int64(ts.GetNanos()/1_000_000)
|
|
|
|
}
|
2022-03-09 14:13:50 -08:00
|
|
|
p.builder.Reset()
|
2021-07-13 06:11:26 -07:00
|
|
|
for _, lp := range exProto.GetLabel() {
|
2022-03-09 14:13:50 -08:00
|
|
|
p.builder.Add(lp.GetName(), lp.GetValue())
|
2021-07-13 06:11:26 -07:00
|
|
|
}
|
2022-03-09 14:13:50 -08:00
|
|
|
p.builder.Sort()
|
|
|
|
ex.Labels = p.builder.Labels()
|
2021-07-13 06:11:26 -07:00
|
|
|
return true
|
2021-06-29 14:45:23 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// Next advances the parser to the next "sample" (emulating the behavior of a
|
|
|
|
// text format parser). It returns (EntryInvalid, io.EOF) if no samples were
|
|
|
|
// read.
|
|
|
|
func (p *ProtobufParser) Next() (Entry, error) {
|
|
|
|
switch p.state {
|
|
|
|
case EntryInvalid:
|
|
|
|
p.metricPos = 0
|
2021-07-13 11:01:44 -07:00
|
|
|
p.fieldPos = -2
|
2021-06-29 14:45:23 -07:00
|
|
|
n, err := readDelimited(p.in[p.inPos:], p.mf)
|
|
|
|
p.inPos += n
|
|
|
|
if err != nil {
|
|
|
|
return p.state, err
|
|
|
|
}
|
|
|
|
|
2021-07-13 11:01:44 -07:00
|
|
|
// Skip empty metric families.
|
|
|
|
if len(p.mf.GetMetric()) == 0 {
|
2021-06-29 14:45:23 -07:00
|
|
|
return p.Next()
|
|
|
|
}
|
|
|
|
|
|
|
|
// We are at the beginning of a metric family. Put only the name
|
2023-01-05 06:21:18 -08:00
|
|
|
// into metricBytes and validate only name, help, and type for now.
|
2021-06-29 14:45:23 -07:00
|
|
|
name := p.mf.GetName()
|
|
|
|
if !model.IsValidMetricName(model.LabelValue(name)) {
|
|
|
|
return EntryInvalid, errors.Errorf("invalid metric name: %s", name)
|
|
|
|
}
|
|
|
|
if help := p.mf.GetHelp(); !utf8.ValidString(help) {
|
|
|
|
return EntryInvalid, errors.Errorf("invalid help for metric %q: %s", name, help)
|
|
|
|
}
|
2023-01-05 06:21:18 -08:00
|
|
|
switch p.mf.GetType() {
|
|
|
|
case dto.MetricType_COUNTER,
|
|
|
|
dto.MetricType_GAUGE,
|
|
|
|
dto.MetricType_HISTOGRAM,
|
2023-01-05 06:39:10 -08:00
|
|
|
dto.MetricType_GAUGE_HISTOGRAM,
|
2023-01-05 06:21:18 -08:00
|
|
|
dto.MetricType_SUMMARY,
|
|
|
|
dto.MetricType_UNTYPED:
|
|
|
|
// All good.
|
|
|
|
default:
|
|
|
|
return EntryInvalid, errors.Errorf("unknown metric type for metric %q: %s", name, p.mf.GetType())
|
|
|
|
}
|
2021-06-29 14:45:23 -07:00
|
|
|
p.metricBytes.Reset()
|
|
|
|
p.metricBytes.WriteString(name)
|
|
|
|
|
|
|
|
p.state = EntryHelp
|
|
|
|
case EntryHelp:
|
|
|
|
p.state = EntryType
|
|
|
|
case EntryType:
|
2023-01-05 06:39:10 -08:00
|
|
|
t := p.mf.GetType()
|
|
|
|
if (t == dto.MetricType_HISTOGRAM || t == dto.MetricType_GAUGE_HISTOGRAM) &&
|
2022-07-19 09:11:33 -07:00
|
|
|
isNativeHistogram(p.mf.GetMetric()[0].GetHistogram()) {
|
2021-06-29 14:45:23 -07:00
|
|
|
p.state = EntryHistogram
|
|
|
|
} else {
|
|
|
|
p.state = EntrySeries
|
|
|
|
}
|
|
|
|
if err := p.updateMetricBytes(); err != nil {
|
|
|
|
return EntryInvalid, err
|
|
|
|
}
|
|
|
|
case EntryHistogram, EntrySeries:
|
2023-05-10 16:59:21 -07:00
|
|
|
if p.redoClassic {
|
|
|
|
p.redoClassic = false
|
|
|
|
p.state = EntrySeries
|
|
|
|
p.fieldPos = -3
|
|
|
|
p.fieldsDone = false
|
|
|
|
}
|
2023-01-05 06:39:10 -08:00
|
|
|
t := p.mf.GetType()
|
2021-07-13 11:01:44 -07:00
|
|
|
if p.state == EntrySeries && !p.fieldsDone &&
|
2023-01-05 06:39:10 -08:00
|
|
|
(t == dto.MetricType_SUMMARY ||
|
|
|
|
t == dto.MetricType_HISTOGRAM ||
|
|
|
|
t == dto.MetricType_GAUGE_HISTOGRAM) {
|
2021-07-13 11:01:44 -07:00
|
|
|
p.fieldPos++
|
|
|
|
} else {
|
|
|
|
p.metricPos++
|
|
|
|
p.fieldPos = -2
|
|
|
|
p.fieldsDone = false
|
2023-07-12 09:42:02 -07:00
|
|
|
// If this is a metric family containing native
|
|
|
|
// histograms, we have to switch back to native
|
|
|
|
// histograms after parsing a classic histogram.
|
|
|
|
if p.state == EntrySeries &&
|
|
|
|
(t == dto.MetricType_HISTOGRAM || t == dto.MetricType_GAUGE_HISTOGRAM) &&
|
|
|
|
isNativeHistogram(p.mf.GetMetric()[0].GetHistogram()) {
|
|
|
|
p.state = EntryHistogram
|
|
|
|
}
|
2021-07-13 11:01:44 -07:00
|
|
|
}
|
2021-06-29 14:45:23 -07:00
|
|
|
if p.metricPos >= len(p.mf.GetMetric()) {
|
|
|
|
p.state = EntryInvalid
|
|
|
|
return p.Next()
|
|
|
|
}
|
|
|
|
if err := p.updateMetricBytes(); err != nil {
|
|
|
|
return EntryInvalid, err
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
return EntryInvalid, errors.Errorf("invalid protobuf parsing state: %d", p.state)
|
|
|
|
}
|
|
|
|
return p.state, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *ProtobufParser) updateMetricBytes() error {
|
|
|
|
b := p.metricBytes
|
|
|
|
b.Reset()
|
2021-07-13 11:01:44 -07:00
|
|
|
b.WriteString(p.getMagicName())
|
2021-06-29 14:45:23 -07:00
|
|
|
for _, lp := range p.mf.GetMetric()[p.metricPos].GetLabel() {
|
|
|
|
b.WriteByte(model.SeparatorByte)
|
|
|
|
n := lp.GetName()
|
|
|
|
if !model.LabelName(n).IsValid() {
|
|
|
|
return errors.Errorf("invalid label name: %s", n)
|
|
|
|
}
|
|
|
|
b.WriteString(n)
|
|
|
|
b.WriteByte(model.SeparatorByte)
|
|
|
|
v := lp.GetValue()
|
|
|
|
if !utf8.ValidString(v) {
|
|
|
|
return errors.Errorf("invalid label value: %s", v)
|
|
|
|
}
|
|
|
|
b.WriteString(v)
|
|
|
|
}
|
2021-07-13 11:01:44 -07:00
|
|
|
if needed, n, v := p.getMagicLabel(); needed {
|
|
|
|
b.WriteByte(model.SeparatorByte)
|
|
|
|
b.WriteString(n)
|
|
|
|
b.WriteByte(model.SeparatorByte)
|
|
|
|
b.WriteString(v)
|
|
|
|
}
|
2021-06-29 14:45:23 -07:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-07-13 11:01:44 -07:00
|
|
|
// getMagicName usually just returns p.mf.GetType() but adds a magic suffix
|
|
|
|
// ("_count", "_sum", "_bucket") if needed according to the current parser
|
|
|
|
// state.
|
|
|
|
func (p *ProtobufParser) getMagicName() string {
|
|
|
|
t := p.mf.GetType()
|
2023-05-10 16:59:21 -07:00
|
|
|
if p.state == EntryHistogram || (t != dto.MetricType_HISTOGRAM && t != dto.MetricType_GAUGE_HISTOGRAM && t != dto.MetricType_SUMMARY) {
|
2021-07-13 11:01:44 -07:00
|
|
|
return p.mf.GetName()
|
|
|
|
}
|
|
|
|
if p.fieldPos == -2 {
|
|
|
|
return p.mf.GetName() + "_count"
|
|
|
|
}
|
|
|
|
if p.fieldPos == -1 {
|
|
|
|
return p.mf.GetName() + "_sum"
|
|
|
|
}
|
2023-01-05 06:39:10 -08:00
|
|
|
if t == dto.MetricType_HISTOGRAM || t == dto.MetricType_GAUGE_HISTOGRAM {
|
2021-07-13 11:01:44 -07:00
|
|
|
return p.mf.GetName() + "_bucket"
|
|
|
|
}
|
|
|
|
return p.mf.GetName()
|
|
|
|
}
|
|
|
|
|
|
|
|
// getMagicLabel returns if a magic label ("quantile" or "le") is needed and, if
|
|
|
|
// so, its name and value. It also sets p.fieldsDone if applicable.
|
|
|
|
func (p *ProtobufParser) getMagicLabel() (bool, string, string) {
|
|
|
|
if p.state == EntryHistogram || p.fieldPos < 0 {
|
|
|
|
return false, "", ""
|
|
|
|
}
|
|
|
|
switch p.mf.GetType() {
|
|
|
|
case dto.MetricType_SUMMARY:
|
|
|
|
qq := p.mf.GetMetric()[p.metricPos].GetSummary().GetQuantile()
|
|
|
|
q := qq[p.fieldPos]
|
|
|
|
p.fieldsDone = p.fieldPos == len(qq)-1
|
|
|
|
return true, model.QuantileLabel, formatOpenMetricsFloat(q.GetQuantile())
|
2023-01-05 06:39:10 -08:00
|
|
|
case dto.MetricType_HISTOGRAM, dto.MetricType_GAUGE_HISTOGRAM:
|
2021-07-13 11:01:44 -07:00
|
|
|
bb := p.mf.GetMetric()[p.metricPos].GetHistogram().GetBucket()
|
|
|
|
if p.fieldPos >= len(bb) {
|
|
|
|
p.fieldsDone = true
|
|
|
|
return true, model.BucketLabel, "+Inf"
|
|
|
|
}
|
|
|
|
b := bb[p.fieldPos]
|
|
|
|
p.fieldsDone = math.IsInf(b.GetUpperBound(), +1)
|
|
|
|
return true, model.BucketLabel, formatOpenMetricsFloat(b.GetUpperBound())
|
|
|
|
}
|
|
|
|
return false, "", ""
|
|
|
|
}
|
|
|
|
|
2021-06-29 14:45:23 -07:00
|
|
|
var errInvalidVarint = errors.New("protobufparse: invalid varint encountered")
|
|
|
|
|
|
|
|
// readDelimited is essentially doing what the function of the same name in
|
|
|
|
// github.com/matttproud/golang_protobuf_extensions/pbutil is doing, but it is
|
|
|
|
// specific to a MetricFamily, utilizes the more efficient gogo-protobuf
|
|
|
|
// unmarshaling, and acts on a byte slice directly without any additional
|
|
|
|
// staging buffers.
|
|
|
|
func readDelimited(b []byte, mf *dto.MetricFamily) (n int, err error) {
|
|
|
|
if len(b) == 0 {
|
|
|
|
return 0, io.EOF
|
|
|
|
}
|
|
|
|
messageLength, varIntLength := proto.DecodeVarint(b)
|
|
|
|
if varIntLength == 0 || varIntLength > binary.MaxVarintLen32 {
|
|
|
|
return 0, errInvalidVarint
|
|
|
|
}
|
|
|
|
totalLength := varIntLength + int(messageLength)
|
|
|
|
if totalLength > len(b) {
|
|
|
|
return 0, errors.Errorf("protobufparse: insufficient length of buffer, expected at least %d bytes, got %d bytes", totalLength, len(b))
|
|
|
|
}
|
|
|
|
mf.Reset()
|
|
|
|
return totalLength, mf.Unmarshal(b[varIntLength:totalLength])
|
|
|
|
}
|
2021-07-13 11:01:44 -07:00
|
|
|
|
|
|
|
// formatOpenMetricsFloat works like the usual Go string formatting of a fleat
|
|
|
|
// but appends ".0" if the resulting number would otherwise contain neither a
|
|
|
|
// "." nor an "e".
|
|
|
|
func formatOpenMetricsFloat(f float64) string {
|
|
|
|
// A few common cases hardcoded.
|
|
|
|
switch {
|
|
|
|
case f == 1:
|
|
|
|
return "1.0"
|
|
|
|
case f == 0:
|
|
|
|
return "0.0"
|
|
|
|
case f == -1:
|
|
|
|
return "-1.0"
|
|
|
|
case math.IsNaN(f):
|
|
|
|
return "NaN"
|
|
|
|
case math.IsInf(f, +1):
|
|
|
|
return "+Inf"
|
|
|
|
case math.IsInf(f, -1):
|
|
|
|
return "-Inf"
|
|
|
|
}
|
|
|
|
s := fmt.Sprint(f)
|
|
|
|
if strings.ContainsAny(s, "e.") {
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
return s + ".0"
|
|
|
|
}
|
2021-07-19 10:58:04 -07:00
|
|
|
|
2022-07-19 09:11:33 -07:00
|
|
|
// isNativeHistogram returns false iff the provided histograms has no sparse
|
|
|
|
// buckets and a zero threshold of 0 and a zero count of 0. In principle, this
|
|
|
|
// could still be meant to be a native histogram (with a zero threshold of 0 and
|
|
|
|
// no observations yet), but for now, we'll treat this case as a conventional
|
2021-07-19 10:58:04 -07:00
|
|
|
// histogram.
|
|
|
|
//
|
|
|
|
// TODO(beorn7): In the final format, there should be an unambiguous way of
|
2022-07-19 09:11:33 -07:00
|
|
|
// deciding if a histogram should be ingested as a conventional one or a native
|
2021-07-19 10:58:04 -07:00
|
|
|
// one.
|
2022-07-19 09:11:33 -07:00
|
|
|
func isNativeHistogram(h *dto.Histogram) bool {
|
2023-07-18 15:59:41 -07:00
|
|
|
return h.GetZeroThreshold() > 0 ||
|
2022-07-19 09:11:33 -07:00
|
|
|
h.GetZeroCount() > 0 ||
|
2023-07-18 15:59:41 -07:00
|
|
|
len(h.GetNegativeDelta()) > 0 ||
|
|
|
|
len(h.GetPositiveDelta()) > 0 ||
|
|
|
|
len(h.GetNegativeCount()) > 0 ||
|
|
|
|
len(h.GetPositiveCount()) > 0
|
2021-07-19 10:58:04 -07:00
|
|
|
}
|