mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-12 22:37:27 -08:00
Merge remote-tracking branch 'upstream/main' into sync-upstream-20231026
This commit is contained in:
commit
6341ba7374
|
@ -18,7 +18,6 @@ build:
|
|||
windows:
|
||||
- builtinassets
|
||||
- stringlabels
|
||||
flags: -a
|
||||
ldflags: |
|
||||
-X github.com/prometheus/common/version.Version={{.Version}}
|
||||
-X github.com/prometheus/common/version.Revision={{.Revision}}
|
||||
|
|
|
@ -202,9 +202,10 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error {
|
|||
level.Info(logger).Log("msg", "No default port will be appended to scrape targets' addresses.")
|
||||
case "native-histograms":
|
||||
c.tsdb.EnableNativeHistograms = true
|
||||
// Change global variable. Hacky, but it's hard to pass new option or default to unmarshaller.
|
||||
// Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers.
|
||||
config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultNativeHistogramScrapeProtocols
|
||||
level.Info(logger).Log("msg", "Experimental native histogram support enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultConfig.GlobalConfig.ScrapeProtocols))
|
||||
config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultNativeHistogramScrapeProtocols
|
||||
level.Info(logger).Log("msg", "Experimental native histogram support enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
|
||||
case "":
|
||||
continue
|
||||
case "promql-at-modifier", "promql-negative-offset":
|
||||
|
@ -620,8 +621,18 @@ func main() {
|
|||
discoveryManagerNotify = legacymanager.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), legacymanager.Name("notify"))
|
||||
}
|
||||
|
||||
scrapeManager, err := scrape.NewManager(
|
||||
&cfg.scrape,
|
||||
log.With(logger, "component", "scrape manager"),
|
||||
fanoutStorage,
|
||||
prometheus.DefaultRegisterer,
|
||||
)
|
||||
if err != nil {
|
||||
level.Error(logger).Log("msg", "failed to create a scrape manager", "err", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
var (
|
||||
scrapeManager = scrape.NewManager(&cfg.scrape, log.With(logger, "component", "scrape manager"), fanoutStorage)
|
||||
tracingManager = tracing.NewManager(logger)
|
||||
|
||||
queryEngine *promql.Engine
|
||||
|
|
15
cmd/promtool/testdata/no-test-group-interval.yml
vendored
Normal file
15
cmd/promtool/testdata/no-test-group-interval.yml
vendored
Normal file
|
@ -0,0 +1,15 @@
|
|||
tests:
|
||||
- input_series:
|
||||
- series: test
|
||||
values: 0 1
|
||||
promql_expr_test:
|
||||
- expr: test
|
||||
eval_time: 59s
|
||||
exp_samples:
|
||||
- value: 0
|
||||
labels: test
|
||||
- expr: test
|
||||
eval_time: 1m
|
||||
exp_samples:
|
||||
- value: 1
|
||||
labels: test
|
|
@ -96,6 +96,9 @@ func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts) []error {
|
|||
// Testing.
|
||||
var errs []error
|
||||
for _, t := range unitTestInp.Tests {
|
||||
if t.Interval == 0 {
|
||||
t.Interval = unitTestInp.EvaluationInterval
|
||||
}
|
||||
ers := t.test(evalInterval, groupOrderMap, queryOpts, unitTestInp.RuleFiles...)
|
||||
if ers != nil {
|
||||
errs = append(errs, ers...)
|
||||
|
|
|
@ -112,6 +112,16 @@ func TestRulesUnitTest(t *testing.T) {
|
|||
},
|
||||
want: 0,
|
||||
},
|
||||
{
|
||||
name: "No test group interval",
|
||||
args: args{
|
||||
files: []string{"./testdata/no-test-group-interval.yml"},
|
||||
},
|
||||
queryOpts: promql.LazyLoaderOpts{
|
||||
EnableNegativeOffset: true,
|
||||
},
|
||||
want: 0,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
|
|
|
@ -39,7 +39,7 @@ tests:
|
|||
|
||||
``` yaml
|
||||
# Series data
|
||||
interval: <duration>
|
||||
[ interval: <duration> | default = evaluation_interval ]
|
||||
input_series:
|
||||
[ - <series> ]
|
||||
|
||||
|
|
4
go.mod
4
go.mod
|
@ -49,7 +49,7 @@ require (
|
|||
github.com/prometheus/alertmanager v0.26.0
|
||||
github.com/prometheus/client_golang v1.17.0
|
||||
github.com/prometheus/client_model v0.5.0
|
||||
github.com/prometheus/common v0.44.0
|
||||
github.com/prometheus/common v0.45.0
|
||||
github.com/prometheus/common/assets v0.2.0
|
||||
github.com/prometheus/common/sigv4 v0.1.0
|
||||
github.com/prometheus/exporter-toolkit v0.10.0
|
||||
|
@ -165,7 +165,7 @@ require (
|
|||
github.com/mailru/easyjson v0.7.7 // indirect
|
||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||
github.com/mattn/go-isatty v0.0.19 // indirect
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
|
||||
github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect
|
||||
github.com/mitchellh/go-homedir v1.1.0 // indirect
|
||||
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
||||
github.com/moby/term v0.0.0-20210619224110-3f7ff695adc6 // indirect
|
||||
|
|
8
go.sum
8
go.sum
|
@ -534,8 +534,8 @@ github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APP
|
|||
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
|
||||
github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 h1:jWpvCLoY8Z/e3VKvlsiIGKtc+UG6U5vzxaoagmhXfyg=
|
||||
github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0/go.mod h1:QUyp042oQthUoa9bqDv0ER0wrtXnBruoNd7aNjkbP+k=
|
||||
github.com/maxatome/go-testdeep v1.12.0 h1:Ql7Go8Tg0C1D/uMMX59LAoYK7LffeJQ6X2T04nTH68g=
|
||||
github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg=
|
||||
github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=
|
||||
|
@ -658,8 +658,8 @@ github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8b
|
|||
github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
|
||||
github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc=
|
||||
github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls=
|
||||
github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY=
|
||||
github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY=
|
||||
github.com/prometheus/common v0.45.0 h1:2BGz0eBc2hdMDLnO/8n0jeB3oPrt2D08CekT0lneoxM=
|
||||
github.com/prometheus/common v0.45.0/go.mod h1:YJmSTw9BoKxJplESWWxlbyttQR4uaEcGyv9MZjVOJsY=
|
||||
github.com/prometheus/common/assets v0.2.0 h1:0P5OrzoHrYBOSM1OigWL3mY8ZvV2N4zIE/5AahrSrfM=
|
||||
github.com/prometheus/common/assets v0.2.0/go.mod h1:D17UVUE12bHbim7HzwUvtqm6gwBEaDQ0F+hIGbFbccI=
|
||||
github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4=
|
||||
|
|
|
@ -338,6 +338,34 @@ func (h *FloatHistogram) Equals(h2 *FloatHistogram) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
// Size returns the total size of the FloatHistogram, which includes the size of the pointer
|
||||
// to FloatHistogram, all its fields, and all elements contained in slices.
|
||||
// NOTE: this is only valid for 64 bit architectures.
|
||||
func (fh *FloatHistogram) Size() int {
|
||||
// Size of each slice separately.
|
||||
posSpanSize := len(fh.PositiveSpans) * 8 // 8 bytes (int32 + uint32).
|
||||
negSpanSize := len(fh.NegativeSpans) * 8 // 8 bytes (int32 + uint32).
|
||||
posBucketSize := len(fh.PositiveBuckets) * 8 // 8 bytes (float64).
|
||||
negBucketSize := len(fh.NegativeBuckets) * 8 // 8 bytes (float64).
|
||||
|
||||
// Total size of the struct.
|
||||
|
||||
// fh is 8 bytes.
|
||||
// fh.CounterResetHint is 4 bytes (1 byte bool + 3 bytes padding).
|
||||
// fh.Schema is 4 bytes.
|
||||
// fh.ZeroThreshold is 8 bytes.
|
||||
// fh.ZeroCount is 8 bytes.
|
||||
// fh.Count is 8 bytes.
|
||||
// fh.Sum is 8 bytes.
|
||||
// fh.PositiveSpans is 24 bytes.
|
||||
// fh.NegativeSpans is 24 bytes.
|
||||
// fh.PositiveBuckets is 24 bytes.
|
||||
// fh.NegativeBuckets is 24 bytes.
|
||||
structSize := 144
|
||||
|
||||
return structSize + posSpanSize + negSpanSize + posBucketSize + negBucketSize
|
||||
}
|
||||
|
||||
// Compact eliminates empty buckets at the beginning and end of each span, then
|
||||
// merges spans that are consecutive or at most maxEmptyBuckets apart, and
|
||||
// finally splits spans that contain more consecutive empty buckets than
|
||||
|
|
|
@ -2341,3 +2341,55 @@ func TestFloatHistogramEquals(t *testing.T) {
|
|||
notEquals(h1, *hNegBucketNaN)
|
||||
equals(*hNegBucketNaN, *hNegBucketNaN)
|
||||
}
|
||||
|
||||
func TestFloatHistogramSize(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
fh *FloatHistogram
|
||||
expected int
|
||||
}{
|
||||
{
|
||||
"without spans and buckets",
|
||||
&FloatHistogram{ // 8 bytes.
|
||||
CounterResetHint: 0, // 1 byte.
|
||||
Schema: 1, // 4 bytes.
|
||||
ZeroThreshold: 0.01, // 8 bytes.
|
||||
ZeroCount: 5.5, // 8 bytes.
|
||||
Count: 3493.3, // 8 bytes.
|
||||
Sum: 2349209.324, // 8 bytes.
|
||||
PositiveSpans: nil, // 24 bytes.
|
||||
PositiveBuckets: nil, // 24 bytes.
|
||||
NegativeSpans: nil, // 24 bytes.
|
||||
NegativeBuckets: nil, // 24 bytes.
|
||||
},
|
||||
8 + 4 + 4 + 8 + 8 + 8 + 8 + 24 + 24 + 24 + 24,
|
||||
},
|
||||
{
|
||||
"complete struct",
|
||||
&FloatHistogram{ // 8 bytes.
|
||||
CounterResetHint: 0, // 1 byte.
|
||||
Schema: 1, // 4 bytes.
|
||||
ZeroThreshold: 0.01, // 8 bytes.
|
||||
ZeroCount: 5.5, // 8 bytes.
|
||||
Count: 3493.3, // 8 bytes.
|
||||
Sum: 2349209.324, // 8 bytes.
|
||||
PositiveSpans: []Span{ // 24 bytes.
|
||||
{-2, 1}, // 2 * 4 bytes.
|
||||
{2, 3}, // 2 * 4 bytes.
|
||||
},
|
||||
PositiveBuckets: []float64{1, 3.3, 4.2, 0.1}, // 24 bytes + 4 * 8 bytes.
|
||||
NegativeSpans: []Span{ // 24 bytes.
|
||||
{3, 2}, // 2 * 4 bytes.
|
||||
{3, 2}}, // 2 * 4 bytes.
|
||||
NegativeBuckets: []float64{3.1, 3, 1.234e5, 1000}, // 24 bytes + 4 * 8 bytes.
|
||||
},
|
||||
8 + 4 + 4 + 8 + 8 + 8 + 8 + (24 + 2*4 + 2*4) + (24 + 2*4 + 2*4) + (24 + 4*8) + (24 + 4*8),
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
require.Equal(t, c.expected, c.fh.Size())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,6 +16,8 @@ package textparse
|
|||
import (
|
||||
"mime"
|
||||
|
||||
"github.com/gogo/protobuf/types"
|
||||
|
||||
"github.com/prometheus/prometheus/model/exemplar"
|
||||
"github.com/prometheus/prometheus/model/histogram"
|
||||
"github.com/prometheus/prometheus/model/labels"
|
||||
|
@ -64,6 +66,11 @@ type Parser interface {
|
|||
// retrieved (including the case where no exemplars exist at all).
|
||||
Exemplar(l *exemplar.Exemplar) bool
|
||||
|
||||
// CreatedTimestamp writes the created timestamp of the current sample
|
||||
// into the passed timestamp. It returns false if no created timestamp
|
||||
// exists or if the metric type does not support created timestamps.
|
||||
CreatedTimestamp(ct *types.Timestamp) bool
|
||||
|
||||
// Next advances the parser to the next sample. It returns false if no
|
||||
// more samples were read or an error occurred.
|
||||
Next() (Entry, error)
|
||||
|
|
|
@ -24,6 +24,8 @@ import (
|
|||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/gogo/protobuf/types"
|
||||
|
||||
"github.com/prometheus/prometheus/model/exemplar"
|
||||
"github.com/prometheus/prometheus/model/histogram"
|
||||
"github.com/prometheus/prometheus/model/labels"
|
||||
|
@ -211,6 +213,11 @@ func (p *OpenMetricsParser) Exemplar(e *exemplar.Exemplar) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
// CreatedTimestamp returns false because OpenMetricsParser does not support created timestamps (yet).
|
||||
func (p *OpenMetricsParser) CreatedTimestamp(_ *types.Timestamp) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// nextToken returns the next token from the openMetricsLexer.
|
||||
func (p *OpenMetricsParser) nextToken() token {
|
||||
tok := p.l.Lex()
|
||||
|
|
|
@ -26,6 +26,8 @@ import (
|
|||
"unicode/utf8"
|
||||
"unsafe"
|
||||
|
||||
"github.com/gogo/protobuf/types"
|
||||
|
||||
"github.com/prometheus/prometheus/model/exemplar"
|
||||
"github.com/prometheus/prometheus/model/histogram"
|
||||
"github.com/prometheus/prometheus/model/labels"
|
||||
|
@ -245,6 +247,11 @@ func (p *PromParser) Exemplar(*exemplar.Exemplar) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// CreatedTimestamp returns false because PromParser does not support created timestamps.
|
||||
func (p *PromParser) CreatedTimestamp(_ *types.Timestamp) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// nextToken returns the next token from the promlexer. It skips over tabs
|
||||
// and spaces.
|
||||
func (p *PromParser) nextToken() token {
|
||||
|
|
|
@ -23,6 +23,7 @@ import (
|
|||
"unicode/utf8"
|
||||
|
||||
"github.com/gogo/protobuf/proto"
|
||||
"github.com/gogo/protobuf/types"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
|
@ -147,9 +148,15 @@ func (p *ProtobufParser) Series() ([]byte, *int64, float64) {
|
|||
if ts != 0 {
|
||||
return p.metricBytes.Bytes(), &ts, v
|
||||
}
|
||||
// Nasty hack: Assume that ts==0 means no timestamp. That's not true in
|
||||
// general, but proto3 has no distinction between unset and
|
||||
// default. Need to avoid in the final format.
|
||||
// TODO(beorn7): We assume here that ts==0 means no timestamp. That's
|
||||
// not true in general, but proto3 originally has no distinction between
|
||||
// unset and default. At a later stage, the `optional` keyword was
|
||||
// (re-)introduced in proto3, but gogo-protobuf never got updated to
|
||||
// support it. (Note that setting `[(gogoproto.nullable) = true]` for
|
||||
// the `timestamp_ms` field doesn't help, either.) We plan to migrate
|
||||
// away from gogo-protobuf to an actively maintained protobuf
|
||||
// implementation. Once that's done, we can simply use the `optional`
|
||||
// keyword and check for the unset state explicitly.
|
||||
return p.metricBytes.Bytes(), nil, v
|
||||
}
|
||||
|
||||
|
@ -347,6 +354,24 @@ func (p *ProtobufParser) Exemplar(ex *exemplar.Exemplar) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
func (p *ProtobufParser) CreatedTimestamp(ct *types.Timestamp) bool {
|
||||
var foundCT *types.Timestamp
|
||||
switch p.mf.GetType() {
|
||||
case dto.MetricType_COUNTER:
|
||||
foundCT = p.mf.GetMetric()[p.metricPos].GetCounter().GetCreatedTimestamp()
|
||||
case dto.MetricType_SUMMARY:
|
||||
foundCT = p.mf.GetMetric()[p.metricPos].GetSummary().GetCreatedTimestamp()
|
||||
case dto.MetricType_HISTOGRAM, dto.MetricType_GAUGE_HISTOGRAM:
|
||||
foundCT = p.mf.GetMetric()[p.metricPos].GetHistogram().GetCreatedTimestamp()
|
||||
default:
|
||||
}
|
||||
if foundCT == nil {
|
||||
return false
|
||||
}
|
||||
*ct = *foundCT
|
||||
return true
|
||||
}
|
||||
|
||||
// Next advances the parser to the next "sample" (emulating the behavior of a
|
||||
// text format parser). It returns (EntryInvalid, io.EOF) if no samples were
|
||||
// read.
|
||||
|
|
|
@ -21,6 +21,7 @@ import (
|
|||
"testing"
|
||||
|
||||
"github.com/gogo/protobuf/proto"
|
||||
"github.com/gogo/protobuf/types"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/prometheus/prometheus/model/exemplar"
|
||||
|
@ -530,6 +531,69 @@ metric: <
|
|||
>
|
||||
>
|
||||
|
||||
`,
|
||||
`name: "test_counter_with_createdtimestamp"
|
||||
help: "A counter with a created timestamp."
|
||||
type: COUNTER
|
||||
metric: <
|
||||
counter: <
|
||||
value: 42
|
||||
created_timestamp: <
|
||||
seconds: 1
|
||||
nanos: 1
|
||||
>
|
||||
>
|
||||
>
|
||||
|
||||
`,
|
||||
`name: "test_summary_with_createdtimestamp"
|
||||
help: "A summary with a created timestamp."
|
||||
type: SUMMARY
|
||||
metric: <
|
||||
summary: <
|
||||
sample_count: 42
|
||||
sample_sum: 1.234
|
||||
created_timestamp: <
|
||||
seconds: 1
|
||||
nanos: 1
|
||||
>
|
||||
>
|
||||
>
|
||||
|
||||
`,
|
||||
`name: "test_histogram_with_createdtimestamp"
|
||||
help: "A histogram with a created timestamp."
|
||||
type: HISTOGRAM
|
||||
metric: <
|
||||
histogram: <
|
||||
created_timestamp: <
|
||||
seconds: 1
|
||||
nanos: 1
|
||||
>
|
||||
positive_span: <
|
||||
offset: 0
|
||||
length: 0
|
||||
>
|
||||
>
|
||||
>
|
||||
|
||||
`,
|
||||
`name: "test_gaugehistogram_with_createdtimestamp"
|
||||
help: "A gauge histogram with a created timestamp."
|
||||
type: GAUGE_HISTOGRAM
|
||||
metric: <
|
||||
histogram: <
|
||||
created_timestamp: <
|
||||
seconds: 1
|
||||
nanos: 1
|
||||
>
|
||||
positive_span: <
|
||||
offset: 0
|
||||
length: 0
|
||||
>
|
||||
>
|
||||
>
|
||||
|
||||
`,
|
||||
}
|
||||
|
||||
|
@ -566,6 +630,7 @@ func TestProtobufParse(t *testing.T) {
|
|||
shs *histogram.Histogram
|
||||
fhs *histogram.FloatHistogram
|
||||
e []exemplar.Exemplar
|
||||
ct *types.Timestamp
|
||||
}
|
||||
|
||||
inputBuf := createTestProtoBuf(t)
|
||||
|
@ -997,6 +1062,86 @@ func TestProtobufParse(t *testing.T) {
|
|||
"__name__", "empty_histogram",
|
||||
),
|
||||
},
|
||||
{
|
||||
m: "test_counter_with_createdtimestamp",
|
||||
help: "A counter with a created timestamp.",
|
||||
},
|
||||
{
|
||||
m: "test_counter_with_createdtimestamp",
|
||||
typ: MetricTypeCounter,
|
||||
},
|
||||
{
|
||||
m: "test_counter_with_createdtimestamp",
|
||||
v: 42,
|
||||
ct: &types.Timestamp{Seconds: 1, Nanos: 1},
|
||||
lset: labels.FromStrings(
|
||||
"__name__", "test_counter_with_createdtimestamp",
|
||||
),
|
||||
},
|
||||
{
|
||||
m: "test_summary_with_createdtimestamp",
|
||||
help: "A summary with a created timestamp.",
|
||||
},
|
||||
{
|
||||
m: "test_summary_with_createdtimestamp",
|
||||
typ: MetricTypeSummary,
|
||||
},
|
||||
{
|
||||
m: "test_summary_with_createdtimestamp_count",
|
||||
v: 42,
|
||||
ct: &types.Timestamp{Seconds: 1, Nanos: 1},
|
||||
lset: labels.FromStrings(
|
||||
"__name__", "test_summary_with_createdtimestamp_count",
|
||||
),
|
||||
},
|
||||
{
|
||||
m: "test_summary_with_createdtimestamp_sum",
|
||||
v: 1.234,
|
||||
ct: &types.Timestamp{Seconds: 1, Nanos: 1},
|
||||
lset: labels.FromStrings(
|
||||
"__name__", "test_summary_with_createdtimestamp_sum",
|
||||
),
|
||||
},
|
||||
{
|
||||
m: "test_histogram_with_createdtimestamp",
|
||||
help: "A histogram with a created timestamp.",
|
||||
},
|
||||
{
|
||||
m: "test_histogram_with_createdtimestamp",
|
||||
typ: MetricTypeHistogram,
|
||||
},
|
||||
{
|
||||
m: "test_histogram_with_createdtimestamp",
|
||||
ct: &types.Timestamp{Seconds: 1, Nanos: 1},
|
||||
shs: &histogram.Histogram{
|
||||
CounterResetHint: histogram.UnknownCounterReset,
|
||||
PositiveSpans: []histogram.Span{},
|
||||
NegativeSpans: []histogram.Span{},
|
||||
},
|
||||
lset: labels.FromStrings(
|
||||
"__name__", "test_histogram_with_createdtimestamp",
|
||||
),
|
||||
},
|
||||
{
|
||||
m: "test_gaugehistogram_with_createdtimestamp",
|
||||
help: "A gauge histogram with a created timestamp.",
|
||||
},
|
||||
{
|
||||
m: "test_gaugehistogram_with_createdtimestamp",
|
||||
typ: MetricTypeGaugeHistogram,
|
||||
},
|
||||
{
|
||||
m: "test_gaugehistogram_with_createdtimestamp",
|
||||
ct: &types.Timestamp{Seconds: 1, Nanos: 1},
|
||||
shs: &histogram.Histogram{
|
||||
CounterResetHint: histogram.GaugeType,
|
||||
PositiveSpans: []histogram.Span{},
|
||||
NegativeSpans: []histogram.Span{},
|
||||
},
|
||||
lset: labels.FromStrings(
|
||||
"__name__", "test_gaugehistogram_with_createdtimestamp",
|
||||
),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -1739,6 +1884,86 @@ func TestProtobufParse(t *testing.T) {
|
|||
"__name__", "empty_histogram",
|
||||
),
|
||||
},
|
||||
{ // 81
|
||||
m: "test_counter_with_createdtimestamp",
|
||||
help: "A counter with a created timestamp.",
|
||||
},
|
||||
{ // 82
|
||||
m: "test_counter_with_createdtimestamp",
|
||||
typ: MetricTypeCounter,
|
||||
},
|
||||
{ // 83
|
||||
m: "test_counter_with_createdtimestamp",
|
||||
v: 42,
|
||||
ct: &types.Timestamp{Seconds: 1, Nanos: 1},
|
||||
lset: labels.FromStrings(
|
||||
"__name__", "test_counter_with_createdtimestamp",
|
||||
),
|
||||
},
|
||||
{ // 84
|
||||
m: "test_summary_with_createdtimestamp",
|
||||
help: "A summary with a created timestamp.",
|
||||
},
|
||||
{ // 85
|
||||
m: "test_summary_with_createdtimestamp",
|
||||
typ: MetricTypeSummary,
|
||||
},
|
||||
{ // 86
|
||||
m: "test_summary_with_createdtimestamp_count",
|
||||
v: 42,
|
||||
ct: &types.Timestamp{Seconds: 1, Nanos: 1},
|
||||
lset: labels.FromStrings(
|
||||
"__name__", "test_summary_with_createdtimestamp_count",
|
||||
),
|
||||
},
|
||||
{ // 87
|
||||
m: "test_summary_with_createdtimestamp_sum",
|
||||
v: 1.234,
|
||||
ct: &types.Timestamp{Seconds: 1, Nanos: 1},
|
||||
lset: labels.FromStrings(
|
||||
"__name__", "test_summary_with_createdtimestamp_sum",
|
||||
),
|
||||
},
|
||||
{ // 88
|
||||
m: "test_histogram_with_createdtimestamp",
|
||||
help: "A histogram with a created timestamp.",
|
||||
},
|
||||
{ // 89
|
||||
m: "test_histogram_with_createdtimestamp",
|
||||
typ: MetricTypeHistogram,
|
||||
},
|
||||
{ // 90
|
||||
m: "test_histogram_with_createdtimestamp",
|
||||
ct: &types.Timestamp{Seconds: 1, Nanos: 1},
|
||||
shs: &histogram.Histogram{
|
||||
CounterResetHint: histogram.UnknownCounterReset,
|
||||
PositiveSpans: []histogram.Span{},
|
||||
NegativeSpans: []histogram.Span{},
|
||||
},
|
||||
lset: labels.FromStrings(
|
||||
"__name__", "test_histogram_with_createdtimestamp",
|
||||
),
|
||||
},
|
||||
{ // 91
|
||||
m: "test_gaugehistogram_with_createdtimestamp",
|
||||
help: "A gauge histogram with a created timestamp.",
|
||||
},
|
||||
{ // 92
|
||||
m: "test_gaugehistogram_with_createdtimestamp",
|
||||
typ: MetricTypeGaugeHistogram,
|
||||
},
|
||||
{ // 93
|
||||
m: "test_gaugehistogram_with_createdtimestamp",
|
||||
ct: &types.Timestamp{Seconds: 1, Nanos: 1},
|
||||
shs: &histogram.Histogram{
|
||||
CounterResetHint: histogram.GaugeType,
|
||||
PositiveSpans: []histogram.Span{},
|
||||
NegativeSpans: []histogram.Span{},
|
||||
},
|
||||
lset: labels.FromStrings(
|
||||
"__name__", "test_gaugehistogram_with_createdtimestamp",
|
||||
),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
@ -1764,8 +1989,10 @@ func TestProtobufParse(t *testing.T) {
|
|||
m, ts, v := p.Series()
|
||||
|
||||
var e exemplar.Exemplar
|
||||
var ct types.Timestamp
|
||||
p.Metric(&res)
|
||||
found := p.Exemplar(&e)
|
||||
eFound := p.Exemplar(&e)
|
||||
ctFound := p.CreatedTimestamp(&ct)
|
||||
require.Equal(t, exp[i].m, string(m), "i: %d", i)
|
||||
if ts != nil {
|
||||
require.Equal(t, exp[i].t, *ts, "i: %d", i)
|
||||
|
@ -1775,12 +2002,18 @@ func TestProtobufParse(t *testing.T) {
|
|||
require.Equal(t, exp[i].v, v, "i: %d", i)
|
||||
require.Equal(t, exp[i].lset, res, "i: %d", i)
|
||||
if len(exp[i].e) == 0 {
|
||||
require.Equal(t, false, found, "i: %d", i)
|
||||
require.Equal(t, false, eFound, "i: %d", i)
|
||||
} else {
|
||||
require.Equal(t, true, found, "i: %d", i)
|
||||
require.Equal(t, true, eFound, "i: %d", i)
|
||||
require.Equal(t, exp[i].e[0], e, "i: %d", i)
|
||||
require.False(t, p.Exemplar(&e), "too many exemplars returned, i: %d", i)
|
||||
}
|
||||
if exp[i].ct != nil {
|
||||
require.Equal(t, true, ctFound, "i: %d", i)
|
||||
require.Equal(t, exp[i].ct.String(), ct.String(), "i: %d", i)
|
||||
} else {
|
||||
require.Equal(t, false, ctFound, "i: %d", i)
|
||||
}
|
||||
|
||||
case EntryHistogram:
|
||||
m, ts, shs, fhs := p.Histogram()
|
||||
|
|
|
@ -965,68 +965,67 @@ func init() {
|
|||
}
|
||||
|
||||
var fileDescriptor_d1e5ddb18987a258 = []byte{
|
||||
// 963 bytes of a gzipped FileDescriptorProto
|
||||
// 960 bytes of a gzipped FileDescriptorProto
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xa4, 0x56, 0xdd, 0x6e, 0x1b, 0x45,
|
||||
0x14, 0xee, 0x76, 0xfd, 0x93, 0x3d, 0x8e, 0x93, 0xcd, 0x60, 0x55, 0xab, 0x40, 0x62, 0xb3, 0x12,
|
||||
0x52, 0x40, 0xc8, 0x16, 0x50, 0x04, 0x2a, 0x45, 0x22, 0x69, 0xd3, 0x14, 0x15, 0xb7, 0x65, 0x6c,
|
||||
0x5f, 0x94, 0x9b, 0xd5, 0xd8, 0x9e, 0xac, 0x57, 0xec, 0xee, 0x2c, 0xfb, 0x53, 0x11, 0xee, 0x79,
|
||||
0x06, 0x5e, 0x01, 0xf1, 0x1c, 0x08, 0xf5, 0x92, 0x07, 0x40, 0x08, 0xe5, 0x49, 0xd0, 0xfc, 0xed,
|
||||
0x3a, 0xd5, 0xba, 0x90, 0xf6, 0x6e, 0xe6, 0xf3, 0x77, 0xce, 0x7c, 0xe7, 0x9b, 0xf1, 0x39, 0x0b,
|
||||
0x6e, 0xc0, 0x46, 0x49, 0xca, 0x22, 0x9a, 0xaf, 0x68, 0x91, 0x8d, 0x16, 0x61, 0x40, 0xe3, 0x7c,
|
||||
0x14, 0xd1, 0x3c, 0x0d, 0x16, 0xd9, 0x30, 0x49, 0x59, 0xce, 0x50, 0x2f, 0x60, 0xc3, 0x8a, 0x33,
|
||||
0x94, 0x9c, 0xfd, 0x9e, 0xcf, 0x7c, 0x26, 0x08, 0x23, 0xbe, 0x92, 0xdc, 0xfd, 0xbe, 0xcf, 0x98,
|
||||
0x1f, 0xd2, 0x91, 0xd8, 0xcd, 0x8b, 0xf3, 0x51, 0x1e, 0x44, 0x34, 0xcb, 0x49, 0x94, 0x48, 0x82,
|
||||
0xfb, 0x29, 0x58, 0xdf, 0x90, 0x39, 0x0d, 0x9f, 0x92, 0x20, 0x45, 0x08, 0x1a, 0x31, 0x89, 0xa8,
|
||||
0x63, 0x0c, 0x8c, 0x23, 0x0b, 0x8b, 0x35, 0xea, 0x41, 0xf3, 0x39, 0x09, 0x0b, 0xea, 0xdc, 0x14,
|
||||
0xa0, 0xdc, 0xb8, 0x07, 0xd0, 0x3c, 0x23, 0x85, 0xbf, 0xf6, 0x33, 0x8f, 0x31, 0xf4, 0xcf, 0xbf,
|
||||
0x19, 0xd0, 0xbe, 0xc7, 0x8a, 0x38, 0xa7, 0x69, 0x3d, 0x03, 0xdd, 0x81, 0x2d, 0xfa, 0x23, 0x8d,
|
||||
0x92, 0x90, 0xa4, 0x22, 0x73, 0xe7, 0xe3, 0xc3, 0x61, 0x5d, 0x5d, 0xc3, 0x53, 0xc5, 0xc2, 0x25,
|
||||
0x1f, 0x8d, 0x61, 0x6f, 0x91, 0x52, 0x92, 0xd3, 0xa5, 0x57, 0x96, 0xe3, 0x98, 0x22, 0xc9, 0xfe,
|
||||
0x50, 0x16, 0x3c, 0xd4, 0x05, 0x0f, 0xa7, 0x9a, 0x71, 0xd2, 0x78, 0xf1, 0x77, 0xdf, 0xc0, 0xb6,
|
||||
0x0a, 0x2d, 0x71, 0xf7, 0x2e, 0x6c, 0x7d, 0x5b, 0x90, 0x38, 0x0f, 0x42, 0x8a, 0xf6, 0x61, 0xeb,
|
||||
0x07, 0xb5, 0x56, 0x7a, 0xcb, 0xfd, 0x55, 0x27, 0xca, 0x52, 0xff, 0x32, 0xa0, 0x3d, 0x29, 0xa2,
|
||||
0x88, 0xa4, 0x17, 0xe8, 0x5d, 0xd8, 0xce, 0x48, 0x94, 0x84, 0xd4, 0x5b, 0xf0, 0xe2, 0x45, 0x86,
|
||||
0x06, 0xee, 0x48, 0x4c, 0xf8, 0x81, 0x0e, 0x00, 0x14, 0x25, 0x2b, 0x22, 0x95, 0xc9, 0x92, 0xc8,
|
||||
0xa4, 0x88, 0xd0, 0x57, 0x6b, 0xe7, 0x9b, 0x03, 0x73, 0xb3, 0x2d, 0x5a, 0xb1, 0xa8, 0xea, 0xc6,
|
||||
0x9a, 0xca, 0x5a, 0x73, 0x1a, 0xaf, 0x6d, 0x4e, 0x1f, 0xda, 0xb3, 0x38, 0xbf, 0x48, 0xe8, 0x72,
|
||||
0xc3, 0x55, 0xff, 0xde, 0x04, 0xeb, 0x61, 0x90, 0xe5, 0xcc, 0x4f, 0x49, 0xf4, 0x7f, 0x1c, 0xf8,
|
||||
0x10, 0xd0, 0x3a, 0xc5, 0x3b, 0x0f, 0x19, 0xc9, 0x85, 0x42, 0x03, 0xdb, 0x6b, 0xc4, 0x07, 0x1c,
|
||||
0xff, 0x2f, 0xbf, 0xee, 0x40, 0x6b, 0x5e, 0x2c, 0xbe, 0xa7, 0xb9, 0x72, 0xeb, 0x9d, 0x7a, 0xb7,
|
||||
0x4e, 0x04, 0x47, 0x79, 0xa5, 0x22, 0xea, 0x9d, 0xda, 0x7d, 0x5d, 0xa7, 0xd0, 0x2d, 0x68, 0x65,
|
||||
0x8b, 0x15, 0x8d, 0x88, 0xd3, 0x1c, 0x18, 0x47, 0x7b, 0x58, 0xed, 0xd0, 0x7b, 0xb0, 0xf3, 0x13,
|
||||
0x4d, 0x99, 0x97, 0xaf, 0x52, 0x9a, 0xad, 0x58, 0xb8, 0x74, 0x5a, 0xa2, 0x8a, 0x2e, 0x47, 0xa7,
|
||||
0x1a, 0xe4, 0x85, 0x0a, 0x9a, 0xf4, 0xad, 0x2d, 0x7c, 0xb3, 0x38, 0x22, 0x5d, 0x3b, 0x02, 0xbb,
|
||||
0xfa, 0x59, 0x79, 0xb6, 0x25, 0xf2, 0xec, 0x94, 0x24, 0xe9, 0xd8, 0x23, 0xe8, 0xc6, 0xd4, 0x27,
|
||||
0x79, 0xf0, 0x9c, 0x7a, 0x59, 0x42, 0x62, 0xc7, 0x12, 0xce, 0x0c, 0x5e, 0xe5, 0xcc, 0x24, 0x21,
|
||||
0xb1, 0x72, 0x67, 0x5b, 0x07, 0x73, 0x8c, 0x8b, 0x2f, 0x93, 0x2d, 0x69, 0x98, 0x13, 0x07, 0x06,
|
||||
0xe6, 0x11, 0xc2, 0xe5, 0x11, 0xf7, 0x39, 0x78, 0x85, 0x26, 0x0b, 0xe8, 0x0c, 0x4c, 0x5e, 0xa3,
|
||||
0x46, 0x65, 0x11, 0x8f, 0xa0, 0x9b, 0xb0, 0x2c, 0xa8, 0xa4, 0x6d, 0x5f, 0x4f, 0x9a, 0x0e, 0xd6,
|
||||
0xd2, 0xca, 0x64, 0x52, 0x5a, 0x57, 0x4a, 0xd3, 0x68, 0x29, 0xad, 0xa4, 0x49, 0x69, 0x3b, 0x52,
|
||||
0x9a, 0x46, 0x85, 0x34, 0xf7, 0x0f, 0x03, 0x5a, 0xf2, 0x40, 0xf4, 0x3e, 0xd8, 0x8b, 0x22, 0x2a,
|
||||
0xc2, 0xf5, 0x72, 0xe4, 0x3b, 0xde, 0xad, 0x70, 0x59, 0xd0, 0x6d, 0xb8, 0xf5, 0x32, 0xf5, 0xca,
|
||||
0x7b, 0xee, 0xbd, 0x14, 0x20, 0x6f, 0xa8, 0x0f, 0x9d, 0x22, 0x49, 0x68, 0xea, 0xcd, 0x59, 0x11,
|
||||
0x2f, 0xd5, 0xa3, 0x06, 0x01, 0x9d, 0x70, 0xe4, 0x4a, 0x73, 0x34, 0xaf, 0xd7, 0x1c, 0xdd, 0xbb,
|
||||
0x00, 0x95, 0x71, 0xfc, 0x51, 0xb2, 0xf3, 0xf3, 0x8c, 0xca, 0x0a, 0xf6, 0xb0, 0xda, 0x71, 0x3c,
|
||||
0xa4, 0xb1, 0x9f, 0xaf, 0xc4, 0xe9, 0x5d, 0xac, 0x76, 0xee, 0x2f, 0x06, 0x6c, 0xe9, 0xa4, 0xe8,
|
||||
0x0b, 0x68, 0x86, 0x7c, 0x36, 0x38, 0x86, 0xb8, 0xa6, 0x7e, 0xbd, 0x86, 0x72, 0x7c, 0xa8, 0x5b,
|
||||
0x92, 0x31, 0xf5, 0xdd, 0x12, 0x7d, 0x0e, 0xd6, 0x35, 0x5a, 0x36, 0xae, 0xc8, 0xee, 0xcf, 0x26,
|
||||
0xb4, 0xc6, 0x62, 0x0e, 0xbe, 0x99, 0xae, 0x8f, 0xa0, 0xe9, 0xf3, 0xc9, 0xa5, 0xa6, 0xce, 0xdb,
|
||||
0xf5, 0xc1, 0x62, 0xb8, 0x61, 0xc9, 0x44, 0x9f, 0x41, 0x7b, 0x21, 0x87, 0x99, 0x92, 0x7c, 0x50,
|
||||
0x1f, 0xa4, 0x26, 0x1e, 0xd6, 0x6c, 0x1e, 0x98, 0xc9, 0xd1, 0xa0, 0x3a, 0xf0, 0x86, 0x40, 0x35,
|
||||
0x3f, 0xb0, 0x66, 0xf3, 0xc0, 0x42, 0x76, 0x5d, 0xd1, 0x4c, 0x36, 0x06, 0xaa, 0xd6, 0x8c, 0x35,
|
||||
0x1b, 0x7d, 0x09, 0xd6, 0x4a, 0x37, 0x63, 0xd1, 0x44, 0x36, 0xda, 0x53, 0xf6, 0x6c, 0x5c, 0x45,
|
||||
0xf0, 0xf6, 0x5d, 0x3a, 0xee, 0x45, 0x99, 0xe8, 0x54, 0x26, 0xee, 0x94, 0xd8, 0x38, 0x73, 0x7f,
|
||||
0x35, 0x60, 0x5b, 0xde, 0xc3, 0x03, 0x12, 0x05, 0xe1, 0x45, 0xed, 0x47, 0x03, 0x82, 0xc6, 0x8a,
|
||||
0x86, 0x89, 0xfa, 0x66, 0x10, 0x6b, 0x74, 0x1b, 0x1a, 0x5c, 0xa3, 0xb0, 0x70, 0x67, 0xd3, 0x7f,
|
||||
0x5e, 0x66, 0x9e, 0x5e, 0x24, 0x14, 0x0b, 0x36, 0x6f, 0xf0, 0xf2, 0xeb, 0xc7, 0x69, 0xbc, 0xaa,
|
||||
0xc1, 0xcb, 0x38, 0xdd, 0xe0, 0x65, 0xc4, 0x07, 0x73, 0x80, 0x2a, 0x1f, 0xea, 0x40, 0xfb, 0xde,
|
||||
0x93, 0xd9, 0xe3, 0xe9, 0x29, 0xb6, 0x6f, 0x20, 0x0b, 0x9a, 0x67, 0xc7, 0xb3, 0xb3, 0x53, 0xdb,
|
||||
0xe0, 0xf8, 0x64, 0x36, 0x1e, 0x1f, 0xe3, 0x67, 0xf6, 0x4d, 0xbe, 0x99, 0x3d, 0x9e, 0x3e, 0x7b,
|
||||
0x7a, 0x7a, 0xdf, 0x36, 0x51, 0x17, 0xac, 0x87, 0x5f, 0x4f, 0xa6, 0x4f, 0xce, 0xf0, 0xf1, 0xd8,
|
||||
0x6e, 0xa0, 0xb7, 0x60, 0x57, 0xc4, 0x78, 0x15, 0xd8, 0x3c, 0x71, 0x5f, 0x5c, 0x1e, 0x1a, 0x7f,
|
||||
0x5e, 0x1e, 0x1a, 0xff, 0x5c, 0x1e, 0x1a, 0xdf, 0xf5, 0x02, 0xe6, 0x55, 0xe2, 0x3c, 0x29, 0x6e,
|
||||
0xde, 0x12, 0x2f, 0xfb, 0x93, 0x7f, 0x03, 0x00, 0x00, 0xff, 0xff, 0x68, 0x3f, 0xd9, 0x07, 0xdd,
|
||||
0x09, 0x00, 0x00,
|
||||
0x14, 0xee, 0xd6, 0xbf, 0x7b, 0x1c, 0x27, 0x9b, 0xc1, 0xaa, 0x56, 0x81, 0xc4, 0x66, 0x25, 0xa4,
|
||||
0x80, 0x90, 0x2d, 0xa0, 0x08, 0x54, 0x8a, 0x44, 0xd2, 0xa6, 0x2e, 0x2a, 0x6e, 0xcb, 0xd8, 0xbe,
|
||||
0x28, 0x37, 0xab, 0xb1, 0x3d, 0x59, 0xaf, 0xd8, 0xdd, 0x59, 0xf6, 0xa7, 0x22, 0xdc, 0xf3, 0x0c,
|
||||
0xbc, 0x00, 0x17, 0x3c, 0x05, 0x97, 0xa8, 0x97, 0x5c, 0x71, 0x89, 0x50, 0x9e, 0x04, 0xcd, 0xdf,
|
||||
0xae, 0x53, 0xad, 0x03, 0x81, 0xbb, 0x99, 0xcf, 0xdf, 0x39, 0xf3, 0x9d, 0x6f, 0xc6, 0xe7, 0x2c,
|
||||
0x38, 0x3e, 0x1b, 0xc5, 0x09, 0x0b, 0x69, 0xb6, 0xa6, 0x79, 0x3a, 0x5a, 0x06, 0x3e, 0x8d, 0xb2,
|
||||
0x51, 0x48, 0xb3, 0xc4, 0x5f, 0xa6, 0xc3, 0x38, 0x61, 0x19, 0x43, 0x3d, 0x9f, 0x0d, 0x4b, 0xce,
|
||||
0x50, 0x72, 0x0e, 0x7a, 0x1e, 0xf3, 0x98, 0x20, 0x8c, 0xf8, 0x4a, 0x72, 0x0f, 0xfa, 0x1e, 0x63,
|
||||
0x5e, 0x40, 0x47, 0x62, 0xb7, 0xc8, 0xcf, 0x47, 0x99, 0x1f, 0xd2, 0x34, 0x23, 0x61, 0x2c, 0x09,
|
||||
0xce, 0xc7, 0x60, 0x7e, 0x45, 0x16, 0x34, 0x78, 0x4e, 0xfc, 0x04, 0x21, 0xa8, 0x47, 0x24, 0xa4,
|
||||
0xb6, 0x31, 0x30, 0x8e, 0x4d, 0x2c, 0xd6, 0xa8, 0x07, 0x8d, 0x97, 0x24, 0xc8, 0xa9, 0x7d, 0x5b,
|
||||
0x80, 0x72, 0xe3, 0x1c, 0x42, 0x63, 0x4c, 0x72, 0x6f, 0xe3, 0x67, 0x1e, 0x63, 0xe8, 0x9f, 0x7f,
|
||||
0x36, 0xa0, 0xf5, 0x80, 0xe5, 0x51, 0x46, 0x93, 0x6a, 0x06, 0xba, 0x07, 0x6d, 0xfa, 0x3d, 0x0d,
|
||||
0xe3, 0x80, 0x24, 0x22, 0x73, 0xe7, 0xc3, 0xa3, 0x61, 0x55, 0x5d, 0xc3, 0x33, 0xc5, 0xc2, 0x05,
|
||||
0x1f, 0x8d, 0x61, 0x7f, 0x99, 0x50, 0x92, 0xd1, 0x95, 0x5b, 0x94, 0x63, 0xd7, 0x44, 0x92, 0x83,
|
||||
0xa1, 0x2c, 0x78, 0xa8, 0x0b, 0x1e, 0xce, 0x34, 0x03, 0x5b, 0x2a, 0xa8, 0x40, 0x9c, 0xfb, 0xd0,
|
||||
0xfe, 0x3a, 0x27, 0x51, 0xe6, 0x07, 0x14, 0x1d, 0x40, 0xfb, 0x3b, 0xb5, 0x56, 0x4a, 0x8b, 0xfd,
|
||||
0x55, 0x0f, 0x8a, 0x22, 0xff, 0x30, 0xa0, 0x35, 0xcd, 0xc3, 0x90, 0x24, 0x17, 0xe8, 0x6d, 0xd8,
|
||||
0x49, 0x49, 0x18, 0x07, 0xd4, 0x5d, 0xf2, 0xb2, 0x45, 0x86, 0x3a, 0xee, 0x48, 0x4c, 0x38, 0x81,
|
||||
0x0e, 0x01, 0x14, 0x25, 0xcd, 0x43, 0x95, 0xc9, 0x94, 0xc8, 0x34, 0x0f, 0xd1, 0x17, 0x1b, 0xe7,
|
||||
0xd7, 0x06, 0xb5, 0xed, 0x86, 0x68, 0xc5, 0xa7, 0xf5, 0x57, 0x7f, 0xf6, 0x6f, 0x6d, 0xa8, 0xac,
|
||||
0xb4, 0xa5, 0xfe, 0x1f, 0x6c, 0xe9, 0x43, 0x6b, 0x1e, 0x65, 0x17, 0x31, 0x5d, 0x6d, 0xb9, 0xde,
|
||||
0x5f, 0x1b, 0x60, 0x3e, 0xf6, 0xd3, 0x8c, 0x79, 0x09, 0x09, 0xff, 0x4d, 0xed, 0xef, 0x03, 0xda,
|
||||
0xa4, 0xb8, 0xe7, 0x01, 0x23, 0x99, 0xd0, 0x66, 0x60, 0x6b, 0x83, 0xf8, 0x88, 0xe3, 0xff, 0xe4,
|
||||
0xd4, 0x3d, 0x68, 0x2e, 0xf2, 0xe5, 0xb7, 0x34, 0x53, 0x3e, 0xbd, 0x55, 0xed, 0xd3, 0xa9, 0xe0,
|
||||
0x28, 0x97, 0x54, 0x44, 0xb5, 0x47, 0x7b, 0x37, 0xf7, 0x08, 0xdd, 0x81, 0x66, 0xba, 0x5c, 0xd3,
|
||||
0x90, 0xd8, 0x8d, 0x81, 0x71, 0xbc, 0x8f, 0xd5, 0x0e, 0xbd, 0x03, 0xbb, 0x3f, 0xd0, 0x84, 0xb9,
|
||||
0xd9, 0x3a, 0xa1, 0xe9, 0x9a, 0x05, 0x2b, 0xbb, 0x29, 0xf4, 0x77, 0x39, 0x3a, 0xd3, 0x20, 0x2f,
|
||||
0x51, 0xd0, 0xa4, 0x63, 0x2d, 0xe1, 0x98, 0xc9, 0x11, 0xe9, 0xd7, 0x31, 0x58, 0xe5, 0xcf, 0xca,
|
||||
0xad, 0xb6, 0xc8, 0xb3, 0x5b, 0x90, 0xa4, 0x57, 0x4f, 0xa0, 0x1b, 0x51, 0x8f, 0x64, 0xfe, 0x4b,
|
||||
0xea, 0xa6, 0x31, 0x89, 0x6c, 0x53, 0x78, 0x32, 0xb8, 0xce, 0x93, 0x69, 0x4c, 0x22, 0xe5, 0xcb,
|
||||
0x8e, 0x0e, 0xe6, 0x18, 0x17, 0x5f, 0x24, 0x5b, 0xd1, 0x20, 0x23, 0x36, 0x0c, 0x6a, 0xc7, 0x08,
|
||||
0x17, 0x47, 0x3c, 0xe4, 0xe0, 0x15, 0x9a, 0x2c, 0xa0, 0x33, 0xa8, 0xf1, 0x1a, 0x35, 0x2a, 0x8b,
|
||||
0x78, 0x02, 0xdd, 0x98, 0xa5, 0x7e, 0x29, 0x6d, 0xe7, 0x66, 0xd2, 0x74, 0xb0, 0x96, 0x56, 0x24,
|
||||
0x93, 0xd2, 0xba, 0x52, 0x9a, 0x46, 0x0b, 0x69, 0x05, 0x4d, 0x4a, 0xdb, 0x95, 0xd2, 0x34, 0x2a,
|
||||
0xa4, 0x39, 0xbf, 0x19, 0xd0, 0x94, 0x07, 0xa2, 0x77, 0xc1, 0x5a, 0xe6, 0x61, 0x1e, 0x6c, 0x96,
|
||||
0x23, 0x5f, 0xf0, 0x5e, 0x89, 0xcb, 0x82, 0xee, 0xc2, 0x9d, 0xd7, 0xa9, 0x57, 0x5e, 0x72, 0xef,
|
||||
0xb5, 0x00, 0x79, 0x43, 0x7d, 0xe8, 0xe4, 0x71, 0x4c, 0x13, 0x77, 0xc1, 0xf2, 0x68, 0xa5, 0x9e,
|
||||
0x33, 0x08, 0xe8, 0x94, 0x23, 0x57, 0x5a, 0x61, 0xed, 0x66, 0xad, 0xd0, 0xb9, 0x0f, 0x50, 0x1a,
|
||||
0xc7, 0x1f, 0x25, 0x3b, 0x3f, 0x4f, 0xa9, 0xac, 0x60, 0x1f, 0xab, 0x1d, 0xc7, 0x03, 0x1a, 0x79,
|
||||
0xd9, 0x5a, 0x9c, 0xde, 0xc5, 0x6a, 0xe7, 0xfc, 0x64, 0x40, 0x5b, 0x27, 0x45, 0x9f, 0x41, 0x23,
|
||||
0xe0, 0x93, 0xc0, 0x36, 0xc4, 0x35, 0xf5, 0xab, 0x35, 0x14, 0xc3, 0x42, 0xdd, 0x92, 0x8c, 0xa9,
|
||||
0xee, 0x90, 0xe8, 0x53, 0x30, 0x6f, 0xd2, 0xa0, 0x4b, 0xb2, 0xf3, 0x63, 0x0d, 0x9a, 0x13, 0x31,
|
||||
0xf5, 0xfe, 0x9f, 0xae, 0x0f, 0xa0, 0xe1, 0xf1, 0x39, 0xa5, 0x66, 0xcc, 0x9b, 0xd5, 0xc1, 0x62,
|
||||
0x94, 0x61, 0xc9, 0x44, 0x9f, 0x40, 0x6b, 0x29, 0x47, 0x97, 0x92, 0x7c, 0x58, 0x1d, 0xa4, 0xe6,
|
||||
0x1b, 0xd6, 0x6c, 0x1e, 0x98, 0xca, 0x71, 0xa0, 0xba, 0xee, 0x96, 0x40, 0x35, 0x33, 0xb0, 0x66,
|
||||
0xf3, 0xc0, 0x5c, 0xf6, 0x5b, 0xd1, 0x4c, 0xb6, 0x06, 0xaa, 0xa6, 0x8c, 0x35, 0x1b, 0x7d, 0x0e,
|
||||
0xe6, 0x5a, 0xb7, 0x61, 0xd1, 0x44, 0xb6, 0xda, 0x53, 0x74, 0x6b, 0x5c, 0x46, 0xf0, 0xc6, 0x5d,
|
||||
0x38, 0xee, 0x86, 0xa9, 0xe8, 0x54, 0x35, 0xdc, 0x29, 0xb0, 0x49, 0xea, 0xfc, 0x62, 0xc0, 0x8e,
|
||||
0xbc, 0x87, 0x47, 0x24, 0xf4, 0x83, 0x8b, 0xca, 0x4f, 0x04, 0x04, 0xf5, 0x35, 0x0d, 0x62, 0xf5,
|
||||
0x85, 0x20, 0xd6, 0xe8, 0x2e, 0xd4, 0xb9, 0x46, 0x61, 0xe1, 0xee, 0xb6, 0xff, 0xbc, 0xcc, 0x3c,
|
||||
0xbb, 0x88, 0x29, 0x16, 0x6c, 0xde, 0xda, 0xe5, 0xb7, 0x8e, 0x5d, 0xbf, 0xae, 0xb5, 0xcb, 0x38,
|
||||
0xdd, 0xda, 0x65, 0xc4, 0x7b, 0x0b, 0x80, 0x32, 0x1f, 0xea, 0x40, 0xeb, 0xc1, 0xb3, 0xf9, 0xd3,
|
||||
0xd9, 0x19, 0xb6, 0x6e, 0x21, 0x13, 0x1a, 0xe3, 0x93, 0xf9, 0xf8, 0xcc, 0x32, 0x38, 0x3e, 0x9d,
|
||||
0x4f, 0x26, 0x27, 0xf8, 0x85, 0x75, 0x9b, 0x6f, 0xe6, 0x4f, 0x67, 0x2f, 0x9e, 0x9f, 0x3d, 0xb4,
|
||||
0x6a, 0xa8, 0x0b, 0xe6, 0xe3, 0x2f, 0xa7, 0xb3, 0x67, 0x63, 0x7c, 0x32, 0xb1, 0xea, 0xe8, 0x0d,
|
||||
0xd8, 0x13, 0x31, 0x6e, 0x09, 0x36, 0x4e, 0x9d, 0x57, 0x97, 0x47, 0xc6, 0xef, 0x97, 0x47, 0xc6,
|
||||
0x5f, 0x97, 0x47, 0xc6, 0x37, 0x3d, 0x9f, 0xb9, 0xa5, 0x38, 0x57, 0x8a, 0x5b, 0x34, 0xc5, 0xcb,
|
||||
0xfe, 0xe8, 0xef, 0x00, 0x00, 0x00, 0xff, 0xff, 0x0d, 0x2e, 0x66, 0xc1, 0xcb, 0x09, 0x00, 0x00,
|
||||
}
|
||||
|
||||
func (m *LabelPair) Marshal() (dAtA []byte, err error) {
|
||||
|
|
|
@ -52,7 +52,7 @@ message Counter {
|
|||
double value = 1;
|
||||
Exemplar exemplar = 2;
|
||||
|
||||
google.protobuf.Timestamp created_timestamp = 3 [(gogoproto.nullable) = true];
|
||||
google.protobuf.Timestamp created_timestamp = 3;
|
||||
}
|
||||
|
||||
message Quantile {
|
||||
|
@ -65,7 +65,7 @@ message Summary {
|
|||
double sample_sum = 2;
|
||||
repeated Quantile quantile = 3 [(gogoproto.nullable) = false];
|
||||
|
||||
google.protobuf.Timestamp created_timestamp = 4 [(gogoproto.nullable) = true];
|
||||
google.protobuf.Timestamp created_timestamp = 4;
|
||||
}
|
||||
|
||||
message Untyped {
|
||||
|
@ -79,7 +79,7 @@ message Histogram {
|
|||
// Buckets for the conventional histogram.
|
||||
repeated Bucket bucket = 3 [(gogoproto.nullable) = false]; // Ordered in increasing order of upper_bound, +Inf bucket is optional.
|
||||
|
||||
google.protobuf.Timestamp created_timestamp = 15 [(gogoproto.nullable) = true];
|
||||
google.protobuf.Timestamp created_timestamp = 15;
|
||||
|
||||
// Everything below here is for native histograms (also known as sparse histograms).
|
||||
// Native histograms are an experimental feature without stability guarantees.
|
||||
|
|
|
@ -1225,10 +1225,11 @@ func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper)
|
|||
enh.Out = result[:0] // Reuse result vector.
|
||||
warnings.Merge(ws)
|
||||
|
||||
ev.currentSamples += len(result)
|
||||
vecNumSamples := result.TotalSamples()
|
||||
ev.currentSamples += vecNumSamples
|
||||
// When we reset currentSamples to tempNumSamples during the next iteration of the loop it also
|
||||
// needs to include the samples from the result here, as they're still in memory.
|
||||
tempNumSamples += len(result)
|
||||
tempNumSamples += vecNumSamples
|
||||
ev.samplesStats.UpdatePeak(ev.currentSamples)
|
||||
|
||||
if ev.currentSamples > ev.maxSamples {
|
||||
|
@ -1324,12 +1325,10 @@ func (ev *evaluator) evalSubquery(subq *parser.SubqueryExpr) (*parser.MatrixSele
|
|||
Range: subq.Range,
|
||||
VectorSelector: vs,
|
||||
}
|
||||
totalSamples := 0
|
||||
for _, s := range mat {
|
||||
totalSamples += len(s.Floats) + len(s.Histograms)
|
||||
vs.Series = append(vs.Series, NewStorageSeries(s))
|
||||
}
|
||||
return ms, totalSamples, ws
|
||||
return ms, mat.TotalSamples(), ws
|
||||
}
|
||||
|
||||
// eval evaluates the given expression as the given AST expression node requires.
|
||||
|
@ -1471,7 +1470,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio
|
|||
it := storage.NewBuffer(selRange)
|
||||
var chkIter chunkenc.Iterator
|
||||
for i, s := range selVS.Series {
|
||||
ev.currentSamples -= len(floats) + len(histograms)
|
||||
ev.currentSamples -= len(floats) + totalHPointSize(histograms)
|
||||
if floats != nil {
|
||||
floats = floats[:0]
|
||||
}
|
||||
|
@ -1515,7 +1514,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio
|
|||
// Make the function call.
|
||||
outVec, annos := call(inArgs, e.Args, enh)
|
||||
warnings.Merge(annos)
|
||||
ev.samplesStats.IncrementSamplesAtStep(step, int64(len(floats)+len(histograms)))
|
||||
ev.samplesStats.IncrementSamplesAtStep(step, int64(len(floats)+totalHPointSize(histograms)))
|
||||
|
||||
enh.Out = outVec[:0]
|
||||
if len(outVec) > 0 {
|
||||
|
@ -1534,10 +1533,11 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio
|
|||
// Only buffer stepRange milliseconds from the second step on.
|
||||
it.ReduceDelta(stepRange)
|
||||
}
|
||||
if len(ss.Floats)+len(ss.Histograms) > 0 {
|
||||
if ev.currentSamples+len(ss.Floats)+len(ss.Histograms) <= ev.maxSamples {
|
||||
histSamples := totalHPointSize(ss.Histograms)
|
||||
if len(ss.Floats)+histSamples > 0 {
|
||||
if ev.currentSamples+len(ss.Floats)+histSamples <= ev.maxSamples {
|
||||
mat = append(mat, ss)
|
||||
ev.currentSamples += len(ss.Floats) + len(ss.Histograms)
|
||||
ev.currentSamples += len(ss.Floats) + histSamples
|
||||
} else {
|
||||
ev.error(ErrTooManySamples(env))
|
||||
}
|
||||
|
@ -1546,7 +1546,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio
|
|||
}
|
||||
ev.samplesStats.UpdatePeak(ev.currentSamples)
|
||||
|
||||
ev.currentSamples -= len(floats) + len(histograms)
|
||||
ev.currentSamples -= len(floats) + totalHPointSize(histograms)
|
||||
putFPointSlice(floats)
|
||||
putHPointSlice(histograms)
|
||||
|
||||
|
@ -1693,14 +1693,18 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio
|
|||
ss.Floats = getFPointSlice(numSteps)
|
||||
}
|
||||
ss.Floats = append(ss.Floats, FPoint{F: f, T: ts})
|
||||
ev.currentSamples++
|
||||
ev.samplesStats.IncrementSamplesAtStep(step, 1)
|
||||
} else {
|
||||
if ss.Histograms == nil {
|
||||
ss.Histograms = getHPointSlice(numSteps)
|
||||
}
|
||||
ss.Histograms = append(ss.Histograms, HPoint{H: h, T: ts})
|
||||
point := HPoint{H: h, T: ts}
|
||||
ss.Histograms = append(ss.Histograms, point)
|
||||
histSize := point.size()
|
||||
ev.currentSamples += histSize
|
||||
ev.samplesStats.IncrementSamplesAtStep(step, int64(histSize))
|
||||
}
|
||||
ev.samplesStats.IncrementSamplesAtStep(step, 1)
|
||||
ev.currentSamples++
|
||||
} else {
|
||||
ev.error(ErrTooManySamples(env))
|
||||
}
|
||||
|
@ -1808,13 +1812,15 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio
|
|||
T: ts,
|
||||
F: mat[i].Floats[0].F,
|
||||
})
|
||||
ev.currentSamples++
|
||||
} else {
|
||||
mat[i].Histograms = append(mat[i].Histograms, HPoint{
|
||||
point := HPoint{
|
||||
T: ts,
|
||||
H: mat[i].Histograms[0].H,
|
||||
})
|
||||
}
|
||||
mat[i].Histograms = append(mat[i].Histograms, point)
|
||||
ev.currentSamples += point.size()
|
||||
}
|
||||
ev.currentSamples++
|
||||
if ev.currentSamples > ev.maxSamples {
|
||||
ev.error(ErrTooManySamples(env))
|
||||
}
|
||||
|
@ -1858,9 +1864,14 @@ func (ev *evaluator) rangeEvalTimestampFunctionOverVectorSelector(vs *parser.Vec
|
|||
F: f,
|
||||
H: h,
|
||||
})
|
||||
|
||||
histSize := 0
|
||||
if h != nil {
|
||||
histSize := h.Size() / 16 // 16 bytes per sample.
|
||||
ev.currentSamples += histSize
|
||||
}
|
||||
ev.currentSamples++
|
||||
ev.samplesStats.IncrementSamplesAtTimestamp(enh.Ts, 1)
|
||||
|
||||
ev.samplesStats.IncrementSamplesAtTimestamp(enh.Ts, int64(1+histSize))
|
||||
if ev.currentSamples > ev.maxSamples {
|
||||
ev.error(ErrTooManySamples(env))
|
||||
}
|
||||
|
@ -1982,10 +1993,10 @@ func (ev *evaluator) matrixSelector(node *parser.MatrixSelector) (Matrix, annota
|
|||
}
|
||||
|
||||
ss.Floats, ss.Histograms = ev.matrixIterSlice(it, mint, maxt, nil, nil)
|
||||
totalLen := int64(len(ss.Floats)) + int64(len(ss.Histograms))
|
||||
ev.samplesStats.IncrementSamplesAtTimestamp(ev.startTimestamp, totalLen)
|
||||
totalSize := int64(len(ss.Floats)) + int64(totalHPointSize(ss.Histograms))
|
||||
ev.samplesStats.IncrementSamplesAtTimestamp(ev.startTimestamp, totalSize)
|
||||
|
||||
if totalLen > 0 {
|
||||
if totalSize > 0 {
|
||||
matrix = append(matrix, ss)
|
||||
} else {
|
||||
putFPointSlice(ss.Floats)
|
||||
|
@ -2041,13 +2052,13 @@ func (ev *evaluator) matrixIterSlice(
|
|||
var drop int
|
||||
for drop = 0; histograms[drop].T < mint; drop++ { // nolint:revive
|
||||
}
|
||||
ev.currentSamples -= drop
|
||||
copy(histograms, histograms[drop:])
|
||||
histograms = histograms[:len(histograms)-drop]
|
||||
ev.currentSamples -= totalHPointSize(histograms)
|
||||
// Only append points with timestamps after the last timestamp we have.
|
||||
mintHistograms = histograms[len(histograms)-1].T + 1
|
||||
} else {
|
||||
ev.currentSamples -= len(histograms)
|
||||
ev.currentSamples -= totalHPointSize(histograms)
|
||||
if histograms != nil {
|
||||
histograms = histograms[:0]
|
||||
}
|
||||
|
@ -2076,11 +2087,12 @@ loop:
|
|||
if ev.currentSamples >= ev.maxSamples {
|
||||
ev.error(ErrTooManySamples(env))
|
||||
}
|
||||
ev.currentSamples++
|
||||
point := HPoint{T: t, H: h}
|
||||
if histograms == nil {
|
||||
histograms = getHPointSlice(16)
|
||||
}
|
||||
histograms = append(histograms, HPoint{T: t, H: h})
|
||||
histograms = append(histograms, point)
|
||||
ev.currentSamples += point.size()
|
||||
}
|
||||
case chunkenc.ValFloat:
|
||||
t, f := buf.At()
|
||||
|
@ -2111,8 +2123,9 @@ loop:
|
|||
if histograms == nil {
|
||||
histograms = getHPointSlice(16)
|
||||
}
|
||||
histograms = append(histograms, HPoint{T: t, H: h})
|
||||
ev.currentSamples++
|
||||
point := HPoint{T: t, H: h}
|
||||
histograms = append(histograms, point)
|
||||
ev.currentSamples += point.size()
|
||||
}
|
||||
case chunkenc.ValFloat:
|
||||
t, f := it.At()
|
||||
|
|
|
@ -168,6 +168,23 @@ func (p HPoint) MarshalJSON() ([]byte, error) {
|
|||
return json.Marshal([...]interface{}{float64(p.T) / 1000, h})
|
||||
}
|
||||
|
||||
// size returns the size of the HPoint compared to the size of an FPoint.
|
||||
// The total size is calculated considering the histogram timestamp (p.T - 8 bytes),
|
||||
// and then a number of bytes in the histogram.
|
||||
// This sum is divided by 16, as samples are 16 bytes.
|
||||
func (p HPoint) size() int {
|
||||
return (p.H.Size() + 8) / 16
|
||||
}
|
||||
|
||||
// totalHPointSize returns the total number of samples in the given slice of HPoints.
|
||||
func totalHPointSize(histograms []HPoint) int {
|
||||
var total int
|
||||
for _, h := range histograms {
|
||||
total += h.size()
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
// Sample is a single sample belonging to a metric. It represents either a float
|
||||
// sample or a histogram sample. If H is nil, it is a float sample. Otherwise,
|
||||
// it is a histogram sample.
|
||||
|
@ -226,6 +243,21 @@ func (vec Vector) String() string {
|
|||
return strings.Join(entries, "\n")
|
||||
}
|
||||
|
||||
// TotalSamples returns the total number of samples in the series within a vector.
|
||||
// Float samples have a weight of 1 in this number, while histogram samples have a higher
|
||||
// weight according to their size compared with the size of a float sample.
|
||||
// See HPoint.size for details.
|
||||
func (vec Vector) TotalSamples() int {
|
||||
numSamples := 0
|
||||
for _, sample := range vec {
|
||||
numSamples++
|
||||
if sample.H != nil {
|
||||
numSamples += sample.H.Size() / 16
|
||||
}
|
||||
}
|
||||
return numSamples
|
||||
}
|
||||
|
||||
// ContainsSameLabelset checks if a vector has samples with the same labelset
|
||||
// Such a behavior is semantically undefined
|
||||
// https://github.com/prometheus/prometheus/issues/4562
|
||||
|
@ -264,10 +296,13 @@ func (m Matrix) String() string {
|
|||
}
|
||||
|
||||
// TotalSamples returns the total number of samples in the series within a matrix.
|
||||
// Float samples have a weight of 1 in this number, while histogram samples have a higher
|
||||
// weight according to their size compared with the size of a float sample.
|
||||
// See HPoint.size for details.
|
||||
func (m Matrix) TotalSamples() int {
|
||||
numSamples := 0
|
||||
for _, series := range m {
|
||||
numSamples += len(series.Floats) + len(series.Histograms)
|
||||
numSamples += len(series.Floats) + totalHPointSize(series.Histograms)
|
||||
}
|
||||
return numSamples
|
||||
}
|
||||
|
|
923
rules/group.go
Normal file
923
rules/group.go
Normal file
|
@ -0,0 +1,923 @@
|
|||
// Copyright 2013 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package rules
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"math"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"golang.org/x/exp/slices"
|
||||
|
||||
"github.com/go-kit/log"
|
||||
"github.com/go-kit/log/level"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/model"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
|
||||
"github.com/prometheus/prometheus/model/labels"
|
||||
"github.com/prometheus/prometheus/model/timestamp"
|
||||
"github.com/prometheus/prometheus/model/value"
|
||||
"github.com/prometheus/prometheus/promql"
|
||||
"github.com/prometheus/prometheus/storage"
|
||||
"github.com/prometheus/prometheus/tsdb/chunkenc"
|
||||
)
|
||||
|
||||
// Group is a set of rules that have a logical relation.
|
||||
type Group struct {
|
||||
name string
|
||||
file string
|
||||
interval time.Duration
|
||||
evaluationDelay *time.Duration
|
||||
limit int
|
||||
rules []Rule
|
||||
sourceTenants []string
|
||||
seriesInPreviousEval []map[string]labels.Labels // One per Rule.
|
||||
staleSeries []labels.Labels
|
||||
opts *ManagerOptions
|
||||
mtx sync.Mutex
|
||||
evaluationTime time.Duration
|
||||
lastEvaluation time.Time // Wall-clock time of most recent evaluation.
|
||||
lastEvalTimestamp time.Time // Time slot used for most recent evaluation.
|
||||
|
||||
shouldRestore bool
|
||||
|
||||
markStale bool
|
||||
done chan struct{}
|
||||
terminated chan struct{}
|
||||
managerDone chan struct{}
|
||||
|
||||
logger log.Logger
|
||||
|
||||
metrics *Metrics
|
||||
|
||||
// Rule group evaluation iteration function,
|
||||
// defaults to DefaultEvalIterationFunc.
|
||||
evalIterationFunc GroupEvalIterationFunc
|
||||
|
||||
alignEvaluationTimeOnInterval bool
|
||||
}
|
||||
|
||||
// GroupEvalIterationFunc is used to implement and extend rule group
|
||||
// evaluation iteration logic. It is configured in Group.evalIterationFunc,
|
||||
// and periodically invoked at each group evaluation interval to
|
||||
// evaluate the rules in the group at that point in time.
|
||||
// DefaultEvalIterationFunc is the default implementation.
|
||||
type GroupEvalIterationFunc func(ctx context.Context, g *Group, evalTimestamp time.Time)
|
||||
|
||||
type GroupOptions struct {
|
||||
Name, File string
|
||||
Interval time.Duration
|
||||
Limit int
|
||||
Rules []Rule
|
||||
SourceTenants []string
|
||||
ShouldRestore bool
|
||||
Opts *ManagerOptions
|
||||
EvaluationDelay *time.Duration
|
||||
done chan struct{}
|
||||
EvalIterationFunc GroupEvalIterationFunc
|
||||
AlignEvaluationTimeOnInterval bool
|
||||
}
|
||||
|
||||
// NewGroup makes a new Group with the given name, options, and rules.
|
||||
func NewGroup(o GroupOptions) *Group {
|
||||
metrics := o.Opts.Metrics
|
||||
if metrics == nil {
|
||||
metrics = NewGroupMetrics(o.Opts.Registerer)
|
||||
}
|
||||
|
||||
key := GroupKey(o.File, o.Name)
|
||||
metrics.IterationsMissed.WithLabelValues(key)
|
||||
metrics.IterationsScheduled.WithLabelValues(key)
|
||||
metrics.EvalTotal.WithLabelValues(key)
|
||||
metrics.EvalFailures.WithLabelValues(key)
|
||||
metrics.GroupLastEvalTime.WithLabelValues(key)
|
||||
metrics.GroupLastDuration.WithLabelValues(key)
|
||||
metrics.GroupRules.WithLabelValues(key).Set(float64(len(o.Rules)))
|
||||
metrics.GroupSamples.WithLabelValues(key)
|
||||
metrics.GroupInterval.WithLabelValues(key).Set(o.Interval.Seconds())
|
||||
|
||||
evalIterationFunc := o.EvalIterationFunc
|
||||
if evalIterationFunc == nil {
|
||||
evalIterationFunc = DefaultEvalIterationFunc
|
||||
}
|
||||
|
||||
return &Group{
|
||||
name: o.Name,
|
||||
file: o.File,
|
||||
interval: o.Interval,
|
||||
evaluationDelay: o.EvaluationDelay,
|
||||
limit: o.Limit,
|
||||
rules: o.Rules,
|
||||
shouldRestore: o.ShouldRestore,
|
||||
opts: o.Opts,
|
||||
sourceTenants: o.SourceTenants,
|
||||
seriesInPreviousEval: make([]map[string]labels.Labels, len(o.Rules)),
|
||||
done: make(chan struct{}),
|
||||
managerDone: o.done,
|
||||
terminated: make(chan struct{}),
|
||||
logger: log.With(o.Opts.Logger, "file", o.File, "group", o.Name),
|
||||
metrics: metrics,
|
||||
evalIterationFunc: evalIterationFunc,
|
||||
alignEvaluationTimeOnInterval: o.AlignEvaluationTimeOnInterval,
|
||||
}
|
||||
}
|
||||
|
||||
// Name returns the group name.
|
||||
func (g *Group) Name() string { return g.name }
|
||||
|
||||
// File returns the group's file.
|
||||
func (g *Group) File() string { return g.file }
|
||||
|
||||
// Rules returns the group's rules.
|
||||
func (g *Group) Rules() []Rule { return g.rules }
|
||||
|
||||
// Queryable returns the group's querable.
|
||||
func (g *Group) Queryable() storage.Queryable { return g.opts.Queryable }
|
||||
|
||||
// Context returns the group's context.
|
||||
func (g *Group) Context() context.Context { return g.opts.Context }
|
||||
|
||||
// Interval returns the group's interval.
|
||||
func (g *Group) Interval() time.Duration { return g.interval }
|
||||
|
||||
// Limit returns the group's limit.
|
||||
func (g *Group) Limit() int { return g.limit }
|
||||
|
||||
// SourceTenants returns the source tenants for the group.
|
||||
// If it's empty or nil, then the owning user/tenant is considered to be the source tenant.
|
||||
func (g *Group) SourceTenants() []string { return g.sourceTenants }
|
||||
|
||||
func (g *Group) Logger() log.Logger { return g.logger }
|
||||
|
||||
func (g *Group) run(ctx context.Context) {
|
||||
defer close(g.terminated)
|
||||
|
||||
// Wait an initial amount to have consistently slotted intervals.
|
||||
evalTimestamp := g.EvalTimestamp(time.Now().UnixNano()).Add(g.interval)
|
||||
select {
|
||||
case <-time.After(time.Until(evalTimestamp)):
|
||||
case <-g.done:
|
||||
return
|
||||
}
|
||||
|
||||
ctx = promql.NewOriginContext(ctx, map[string]interface{}{
|
||||
"ruleGroup": map[string]string{
|
||||
"file": g.File(),
|
||||
"name": g.Name(),
|
||||
},
|
||||
})
|
||||
|
||||
// The assumption here is that since the ticker was started after having
|
||||
// waited for `evalTimestamp` to pass, the ticks will trigger soon
|
||||
// after each `evalTimestamp + N * g.interval` occurrence.
|
||||
tick := time.NewTicker(g.interval)
|
||||
defer tick.Stop()
|
||||
|
||||
defer func() {
|
||||
if !g.markStale {
|
||||
return
|
||||
}
|
||||
go func(now time.Time) {
|
||||
for _, rule := range g.seriesInPreviousEval {
|
||||
for _, r := range rule {
|
||||
g.staleSeries = append(g.staleSeries, r)
|
||||
}
|
||||
}
|
||||
// That can be garbage collected at this point.
|
||||
g.seriesInPreviousEval = nil
|
||||
// Wait for 2 intervals to give the opportunity to renamed rules
|
||||
// to insert new series in the tsdb. At this point if there is a
|
||||
// renamed rule, it should already be started.
|
||||
select {
|
||||
case <-g.managerDone:
|
||||
case <-time.After(2 * g.interval):
|
||||
g.cleanupStaleSeries(ctx, now)
|
||||
}
|
||||
}(time.Now())
|
||||
}()
|
||||
|
||||
g.evalIterationFunc(ctx, g, evalTimestamp)
|
||||
if g.shouldRestore {
|
||||
// If we have to restore, we wait for another Eval to finish.
|
||||
// The reason behind this is, during first eval (or before it)
|
||||
// we might not have enough data scraped, and recording rules would not
|
||||
// have updated the latest values, on which some alerts might depend.
|
||||
select {
|
||||
case <-g.done:
|
||||
return
|
||||
case <-tick.C:
|
||||
missed := (time.Since(evalTimestamp) / g.interval) - 1
|
||||
if missed > 0 {
|
||||
g.metrics.IterationsMissed.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed))
|
||||
g.metrics.IterationsScheduled.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed))
|
||||
}
|
||||
evalTimestamp = evalTimestamp.Add((missed + 1) * g.interval)
|
||||
g.evalIterationFunc(ctx, g, evalTimestamp)
|
||||
}
|
||||
|
||||
g.RestoreForState(time.Now())
|
||||
g.shouldRestore = false
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-g.done:
|
||||
return
|
||||
default:
|
||||
select {
|
||||
case <-g.done:
|
||||
return
|
||||
case <-tick.C:
|
||||
missed := (time.Since(evalTimestamp) / g.interval) - 1
|
||||
if missed > 0 {
|
||||
g.metrics.IterationsMissed.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed))
|
||||
g.metrics.IterationsScheduled.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed))
|
||||
}
|
||||
evalTimestamp = evalTimestamp.Add((missed + 1) * g.interval)
|
||||
|
||||
g.evalIterationFunc(ctx, g, evalTimestamp)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (g *Group) stop() {
|
||||
close(g.done)
|
||||
<-g.terminated
|
||||
}
|
||||
|
||||
func (g *Group) hash() uint64 {
|
||||
l := labels.New(
|
||||
labels.Label{Name: "name", Value: g.name},
|
||||
labels.Label{Name: "file", Value: g.file},
|
||||
)
|
||||
return l.Hash()
|
||||
}
|
||||
|
||||
// AlertingRules returns the list of the group's alerting rules.
|
||||
func (g *Group) AlertingRules() []*AlertingRule {
|
||||
g.mtx.Lock()
|
||||
defer g.mtx.Unlock()
|
||||
|
||||
var alerts []*AlertingRule
|
||||
for _, rule := range g.rules {
|
||||
if alertingRule, ok := rule.(*AlertingRule); ok {
|
||||
alerts = append(alerts, alertingRule)
|
||||
}
|
||||
}
|
||||
slices.SortFunc(alerts, func(a, b *AlertingRule) int {
|
||||
if a.State() == b.State() {
|
||||
return strings.Compare(a.Name(), b.Name())
|
||||
}
|
||||
return int(b.State() - a.State())
|
||||
})
|
||||
return alerts
|
||||
}
|
||||
|
||||
// HasAlertingRules returns true if the group contains at least one AlertingRule.
|
||||
func (g *Group) HasAlertingRules() bool {
|
||||
g.mtx.Lock()
|
||||
defer g.mtx.Unlock()
|
||||
|
||||
for _, rule := range g.rules {
|
||||
if _, ok := rule.(*AlertingRule); ok {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// GetEvaluationTime returns the time in seconds it took to evaluate the rule group.
|
||||
func (g *Group) GetEvaluationTime() time.Duration {
|
||||
g.mtx.Lock()
|
||||
defer g.mtx.Unlock()
|
||||
return g.evaluationTime
|
||||
}
|
||||
|
||||
// setEvaluationTime sets the time in seconds the last evaluation took.
|
||||
func (g *Group) setEvaluationTime(dur time.Duration) {
|
||||
g.metrics.GroupLastDuration.WithLabelValues(GroupKey(g.file, g.name)).Set(dur.Seconds())
|
||||
|
||||
g.mtx.Lock()
|
||||
defer g.mtx.Unlock()
|
||||
g.evaluationTime = dur
|
||||
}
|
||||
|
||||
// GetLastEvaluation returns the time the last evaluation of the rule group took place.
|
||||
func (g *Group) GetLastEvaluation() time.Time {
|
||||
g.mtx.Lock()
|
||||
defer g.mtx.Unlock()
|
||||
return g.lastEvaluation
|
||||
}
|
||||
|
||||
// setLastEvaluation updates evaluationTimestamp to the timestamp of when the rule group was last evaluated.
|
||||
func (g *Group) setLastEvaluation(ts time.Time) {
|
||||
g.metrics.GroupLastEvalTime.WithLabelValues(GroupKey(g.file, g.name)).Set(float64(ts.UnixNano()) / 1e9)
|
||||
|
||||
g.mtx.Lock()
|
||||
defer g.mtx.Unlock()
|
||||
g.lastEvaluation = ts
|
||||
}
|
||||
|
||||
// GetLastEvalTimestamp returns the timestamp of the last evaluation.
|
||||
func (g *Group) GetLastEvalTimestamp() time.Time {
|
||||
g.mtx.Lock()
|
||||
defer g.mtx.Unlock()
|
||||
return g.lastEvalTimestamp
|
||||
}
|
||||
|
||||
// setLastEvalTimestamp updates lastEvalTimestamp to the timestamp of the last evaluation.
|
||||
func (g *Group) setLastEvalTimestamp(ts time.Time) {
|
||||
g.mtx.Lock()
|
||||
defer g.mtx.Unlock()
|
||||
g.lastEvalTimestamp = ts
|
||||
}
|
||||
|
||||
// EvalTimestamp returns the immediately preceding consistently slotted evaluation time.
|
||||
func (g *Group) EvalTimestamp(startTime int64) time.Time {
|
||||
var offset int64
|
||||
if !g.alignEvaluationTimeOnInterval {
|
||||
offset = int64(g.hash() % uint64(g.interval))
|
||||
}
|
||||
var (
|
||||
// This group's evaluation times differ from the perfect time intervals by `offset` nanoseconds.
|
||||
// But we can only use `% interval` to align with the interval. And `% interval` will always
|
||||
// align with the perfect time intervals, instead of this group's. Because of this we add
|
||||
// `offset` _after_ aligning with the perfect time interval.
|
||||
//
|
||||
// There can be cases where adding `offset` to the perfect evaluation time can yield a
|
||||
// timestamp in the future, which is not what EvalTimestamp should do.
|
||||
// So we subtract one `offset` to make sure that `now - (now % interval) + offset` gives an
|
||||
// evaluation time in the past.
|
||||
adjNow = startTime - offset
|
||||
|
||||
// Adjust to perfect evaluation intervals.
|
||||
base = adjNow - (adjNow % int64(g.interval))
|
||||
|
||||
// Add one offset to randomize the evaluation times of this group.
|
||||
next = base + offset
|
||||
)
|
||||
|
||||
return time.Unix(0, next).UTC()
|
||||
}
|
||||
|
||||
func nameAndLabels(rule Rule) string {
|
||||
return rule.Name() + rule.Labels().String()
|
||||
}
|
||||
|
||||
// CopyState copies the alerting rule and staleness related state from the given group.
|
||||
//
|
||||
// Rules are matched based on their name and labels. If there are duplicates, the
|
||||
// first is matched with the first, second with the second etc.
|
||||
func (g *Group) CopyState(from *Group) {
|
||||
g.evaluationTime = from.evaluationTime
|
||||
g.lastEvaluation = from.lastEvaluation
|
||||
|
||||
ruleMap := make(map[string][]int, len(from.rules))
|
||||
|
||||
for fi, fromRule := range from.rules {
|
||||
nameAndLabels := nameAndLabels(fromRule)
|
||||
l := ruleMap[nameAndLabels]
|
||||
ruleMap[nameAndLabels] = append(l, fi)
|
||||
}
|
||||
|
||||
for i, rule := range g.rules {
|
||||
nameAndLabels := nameAndLabels(rule)
|
||||
indexes := ruleMap[nameAndLabels]
|
||||
if len(indexes) == 0 {
|
||||
continue
|
||||
}
|
||||
fi := indexes[0]
|
||||
g.seriesInPreviousEval[i] = from.seriesInPreviousEval[fi]
|
||||
ruleMap[nameAndLabels] = indexes[1:]
|
||||
|
||||
ar, ok := rule.(*AlertingRule)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
far, ok := from.rules[fi].(*AlertingRule)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
for fp, a := range far.active {
|
||||
ar.active[fp] = a
|
||||
}
|
||||
}
|
||||
|
||||
// Handle deleted and unmatched duplicate rules.
|
||||
g.staleSeries = from.staleSeries
|
||||
for fi, fromRule := range from.rules {
|
||||
nameAndLabels := nameAndLabels(fromRule)
|
||||
l := ruleMap[nameAndLabels]
|
||||
if len(l) != 0 {
|
||||
for _, series := range from.seriesInPreviousEval[fi] {
|
||||
g.staleSeries = append(g.staleSeries, series)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Eval runs a single evaluation cycle in which all rules are evaluated sequentially.
|
||||
func (g *Group) Eval(ctx context.Context, ts time.Time) {
|
||||
var samplesTotal float64
|
||||
evaluationDelay := g.EvaluationDelay()
|
||||
for i, rule := range g.rules {
|
||||
select {
|
||||
case <-g.done:
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
func(i int, rule Rule) {
|
||||
logger := log.WithPrefix(g.logger, "name", rule.Name(), "index", i)
|
||||
ctx, sp := otel.Tracer("").Start(ctx, "rule")
|
||||
sp.SetAttributes(attribute.String("name", rule.Name()))
|
||||
defer func(t time.Time) {
|
||||
sp.End()
|
||||
|
||||
since := time.Since(t)
|
||||
g.metrics.EvalDuration.Observe(since.Seconds())
|
||||
rule.SetEvaluationDuration(since)
|
||||
rule.SetEvaluationTimestamp(t)
|
||||
}(time.Now())
|
||||
|
||||
if sp.SpanContext().IsSampled() && sp.SpanContext().HasTraceID() {
|
||||
logger = log.WithPrefix(g.logger, "traceID", sp.SpanContext().TraceID())
|
||||
}
|
||||
|
||||
g.metrics.EvalTotal.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
|
||||
|
||||
vector, err := rule.Eval(ctx, evaluationDelay, ts, g.opts.QueryFunc, g.opts.ExternalURL, g.Limit())
|
||||
if err != nil {
|
||||
rule.SetHealth(HealthBad)
|
||||
rule.SetLastError(err)
|
||||
sp.SetStatus(codes.Error, err.Error())
|
||||
g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
|
||||
|
||||
// Canceled queries are intentional termination of queries. This normally
|
||||
// happens on shutdown and thus we skip logging of any errors here.
|
||||
var eqc promql.ErrQueryCanceled
|
||||
if !errors.As(err, &eqc) {
|
||||
level.Warn(logger).Log("msg", "Evaluating rule failed", "rule", rule, "err", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
rule.SetHealth(HealthGood)
|
||||
rule.SetLastError(nil)
|
||||
samplesTotal += float64(len(vector))
|
||||
|
||||
if ar, ok := rule.(*AlertingRule); ok {
|
||||
ar.sendAlerts(ctx, ts, g.opts.ResendDelay, g.interval, g.opts.NotifyFunc)
|
||||
}
|
||||
var (
|
||||
numOutOfOrder = 0
|
||||
numTooOld = 0
|
||||
numDuplicates = 0
|
||||
)
|
||||
|
||||
app := g.opts.Appendable.Appender(ctx)
|
||||
seriesReturned := make(map[string]labels.Labels, len(g.seriesInPreviousEval[i]))
|
||||
defer func() {
|
||||
if err := app.Commit(); err != nil {
|
||||
rule.SetHealth(HealthBad)
|
||||
rule.SetLastError(err)
|
||||
sp.SetStatus(codes.Error, err.Error())
|
||||
g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
|
||||
|
||||
level.Warn(logger).Log("msg", "Rule sample appending failed", "err", err)
|
||||
return
|
||||
}
|
||||
g.seriesInPreviousEval[i] = seriesReturned
|
||||
}()
|
||||
|
||||
for _, s := range vector {
|
||||
if s.H != nil {
|
||||
_, err = app.AppendHistogram(0, s.Metric, s.T, nil, s.H)
|
||||
} else {
|
||||
_, err = app.Append(0, s.Metric, s.T, s.F)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
rule.SetHealth(HealthBad)
|
||||
rule.SetLastError(err)
|
||||
sp.SetStatus(codes.Error, err.Error())
|
||||
unwrappedErr := errors.Unwrap(err)
|
||||
if unwrappedErr == nil {
|
||||
unwrappedErr = err
|
||||
}
|
||||
switch {
|
||||
case errors.Is(unwrappedErr, storage.ErrOutOfOrderSample):
|
||||
numOutOfOrder++
|
||||
level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
|
||||
case errors.Is(unwrappedErr, storage.ErrTooOldSample):
|
||||
numTooOld++
|
||||
level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
|
||||
case errors.Is(unwrappedErr, storage.ErrDuplicateSampleForTimestamp):
|
||||
numDuplicates++
|
||||
level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
|
||||
default:
|
||||
level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
|
||||
}
|
||||
} else {
|
||||
buf := [1024]byte{}
|
||||
seriesReturned[string(s.Metric.Bytes(buf[:]))] = s.Metric
|
||||
}
|
||||
}
|
||||
if numOutOfOrder > 0 {
|
||||
level.Warn(logger).Log("msg", "Error on ingesting out-of-order result from rule evaluation", "numDropped", numOutOfOrder)
|
||||
}
|
||||
if numTooOld > 0 {
|
||||
level.Warn(logger).Log("msg", "Error on ingesting too old result from rule evaluation", "numDropped", numTooOld)
|
||||
}
|
||||
if numDuplicates > 0 {
|
||||
level.Warn(logger).Log("msg", "Error on ingesting results from rule evaluation with different value but same timestamp", "numDropped", numDuplicates)
|
||||
}
|
||||
|
||||
for metric, lset := range g.seriesInPreviousEval[i] {
|
||||
if _, ok := seriesReturned[metric]; !ok {
|
||||
// Series no longer exposed, mark it stale.
|
||||
_, err = app.Append(0, lset, timestamp.FromTime(ts.Add(-evaluationDelay)), math.Float64frombits(value.StaleNaN))
|
||||
unwrappedErr := errors.Unwrap(err)
|
||||
if unwrappedErr == nil {
|
||||
unwrappedErr = err
|
||||
}
|
||||
switch {
|
||||
case unwrappedErr == nil:
|
||||
case errors.Is(unwrappedErr, storage.ErrOutOfOrderSample),
|
||||
errors.Is(unwrappedErr, storage.ErrTooOldSample),
|
||||
errors.Is(unwrappedErr, storage.ErrDuplicateSampleForTimestamp):
|
||||
// Do not count these in logging, as this is expected if series
|
||||
// is exposed from a different rule.
|
||||
default:
|
||||
level.Warn(logger).Log("msg", "Adding stale sample failed", "sample", lset.String(), "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}(i, rule)
|
||||
}
|
||||
if g.metrics != nil {
|
||||
g.metrics.GroupSamples.WithLabelValues(GroupKey(g.File(), g.Name())).Set(samplesTotal)
|
||||
}
|
||||
g.cleanupStaleSeries(ctx, ts)
|
||||
}
|
||||
|
||||
func (g *Group) EvaluationDelay() time.Duration {
|
||||
if g.evaluationDelay != nil {
|
||||
return *g.evaluationDelay
|
||||
}
|
||||
if g.opts.DefaultEvaluationDelay != nil {
|
||||
return g.opts.DefaultEvaluationDelay()
|
||||
}
|
||||
return time.Duration(0)
|
||||
}
|
||||
|
||||
func (g *Group) cleanupStaleSeries(ctx context.Context, ts time.Time) {
|
||||
if len(g.staleSeries) == 0 {
|
||||
return
|
||||
}
|
||||
app := g.opts.Appendable.Appender(ctx)
|
||||
evaluationDelay := g.EvaluationDelay()
|
||||
for _, s := range g.staleSeries {
|
||||
// Rule that produced series no longer configured, mark it stale.
|
||||
_, err := app.Append(0, s, timestamp.FromTime(ts.Add(-evaluationDelay)), math.Float64frombits(value.StaleNaN))
|
||||
unwrappedErr := errors.Unwrap(err)
|
||||
if unwrappedErr == nil {
|
||||
unwrappedErr = err
|
||||
}
|
||||
switch {
|
||||
case unwrappedErr == nil:
|
||||
case errors.Is(unwrappedErr, storage.ErrOutOfOrderSample),
|
||||
errors.Is(unwrappedErr, storage.ErrTooOldSample),
|
||||
errors.Is(unwrappedErr, storage.ErrDuplicateSampleForTimestamp):
|
||||
// Do not count these in logging, as this is expected if series
|
||||
// is exposed from a different rule.
|
||||
default:
|
||||
level.Warn(g.logger).Log("msg", "Adding stale sample for previous configuration failed", "sample", s, "err", err)
|
||||
}
|
||||
}
|
||||
if err := app.Commit(); err != nil {
|
||||
level.Warn(g.logger).Log("msg", "Stale sample appending for previous configuration failed", "err", err)
|
||||
} else {
|
||||
g.staleSeries = nil
|
||||
}
|
||||
}
|
||||
|
||||
// RestoreForState restores the 'for' state of the alerts
|
||||
// by looking up last ActiveAt from storage.
|
||||
func (g *Group) RestoreForState(ts time.Time) {
|
||||
maxtMS := int64(model.TimeFromUnixNano(ts.UnixNano()))
|
||||
// We allow restoration only if alerts were active before after certain time.
|
||||
mint := ts.Add(-g.opts.OutageTolerance)
|
||||
mintMS := int64(model.TimeFromUnixNano(mint.UnixNano()))
|
||||
q, err := g.opts.Queryable.Querier(mintMS, maxtMS)
|
||||
if err != nil {
|
||||
level.Error(g.logger).Log("msg", "Failed to get Querier", "err", err)
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if err := q.Close(); err != nil {
|
||||
level.Error(g.logger).Log("msg", "Failed to close Querier", "err", err)
|
||||
}
|
||||
}()
|
||||
|
||||
for _, rule := range g.Rules() {
|
||||
alertRule, ok := rule.(*AlertingRule)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
alertHoldDuration := alertRule.HoldDuration()
|
||||
if alertHoldDuration < g.opts.ForGracePeriod {
|
||||
// If alertHoldDuration is already less than grace period, we would not
|
||||
// like to make it wait for `g.opts.ForGracePeriod` time before firing.
|
||||
// Hence we skip restoration, which will make it wait for alertHoldDuration.
|
||||
alertRule.SetRestored(true)
|
||||
continue
|
||||
}
|
||||
|
||||
alertRule.ForEachActiveAlert(func(a *Alert) {
|
||||
var s storage.Series
|
||||
|
||||
s, err := alertRule.QueryforStateSeries(g.opts.Context, a, q)
|
||||
if err != nil {
|
||||
// Querier Warnings are ignored. We do not care unless we have an error.
|
||||
level.Error(g.logger).Log(
|
||||
"msg", "Failed to restore 'for' state",
|
||||
labels.AlertName, alertRule.Name(),
|
||||
"stage", "Select",
|
||||
"err", err,
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
if s == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Series found for the 'for' state.
|
||||
var t int64
|
||||
var v float64
|
||||
it := s.Iterator(nil)
|
||||
for it.Next() == chunkenc.ValFloat {
|
||||
t, v = it.At()
|
||||
}
|
||||
if it.Err() != nil {
|
||||
level.Error(g.logger).Log("msg", "Failed to restore 'for' state",
|
||||
labels.AlertName, alertRule.Name(), "stage", "Iterator", "err", it.Err())
|
||||
return
|
||||
}
|
||||
if value.IsStaleNaN(v) { // Alert was not active.
|
||||
return
|
||||
}
|
||||
|
||||
downAt := time.Unix(t/1000, 0).UTC()
|
||||
restoredActiveAt := time.Unix(int64(v), 0).UTC()
|
||||
timeSpentPending := downAt.Sub(restoredActiveAt)
|
||||
timeRemainingPending := alertHoldDuration - timeSpentPending
|
||||
|
||||
switch {
|
||||
case timeRemainingPending <= 0:
|
||||
// It means that alert was firing when prometheus went down.
|
||||
// In the next Eval, the state of this alert will be set back to
|
||||
// firing again if it's still firing in that Eval.
|
||||
// Nothing to be done in this case.
|
||||
case timeRemainingPending < g.opts.ForGracePeriod:
|
||||
// (new) restoredActiveAt = (ts + m.opts.ForGracePeriod) - alertHoldDuration
|
||||
// /* new firing time */ /* moving back by hold duration */
|
||||
//
|
||||
// Proof of correctness:
|
||||
// firingTime = restoredActiveAt.Add(alertHoldDuration)
|
||||
// = ts + m.opts.ForGracePeriod - alertHoldDuration + alertHoldDuration
|
||||
// = ts + m.opts.ForGracePeriod
|
||||
//
|
||||
// Time remaining to fire = firingTime.Sub(ts)
|
||||
// = (ts + m.opts.ForGracePeriod) - ts
|
||||
// = m.opts.ForGracePeriod
|
||||
restoredActiveAt = ts.Add(g.opts.ForGracePeriod).Add(-alertHoldDuration)
|
||||
default:
|
||||
// By shifting ActiveAt to the future (ActiveAt + some_duration),
|
||||
// the total pending time from the original ActiveAt
|
||||
// would be `alertHoldDuration + some_duration`.
|
||||
// Here, some_duration = downDuration.
|
||||
downDuration := ts.Sub(downAt)
|
||||
restoredActiveAt = restoredActiveAt.Add(downDuration)
|
||||
}
|
||||
|
||||
a.ActiveAt = restoredActiveAt
|
||||
level.Debug(g.logger).Log("msg", "'for' state restored",
|
||||
labels.AlertName, alertRule.Name(), "restored_time", a.ActiveAt.Format(time.RFC850),
|
||||
"labels", a.Labels.String())
|
||||
})
|
||||
|
||||
alertRule.SetRestored(true)
|
||||
}
|
||||
}
|
||||
|
||||
// Equals return if two groups are the same.
|
||||
func (g *Group) Equals(ng *Group) bool {
|
||||
if g.name != ng.name {
|
||||
return false
|
||||
}
|
||||
|
||||
if g.file != ng.file {
|
||||
return false
|
||||
}
|
||||
|
||||
if g.interval != ng.interval {
|
||||
return false
|
||||
}
|
||||
|
||||
if g.limit != ng.limit {
|
||||
return false
|
||||
}
|
||||
|
||||
if len(g.rules) != len(ng.rules) {
|
||||
return false
|
||||
}
|
||||
|
||||
if g.alignEvaluationTimeOnInterval != ng.alignEvaluationTimeOnInterval {
|
||||
return false
|
||||
}
|
||||
|
||||
for i, gr := range g.rules {
|
||||
if gr.String() != ng.rules[i].String() {
|
||||
return false
|
||||
}
|
||||
}
|
||||
{
|
||||
// compare source tenants
|
||||
if len(g.sourceTenants) != len(ng.sourceTenants) {
|
||||
return false
|
||||
}
|
||||
|
||||
copyAndSort := func(x []string) []string {
|
||||
copied := make([]string, len(x))
|
||||
copy(copied, x)
|
||||
sort.Strings(copied)
|
||||
return copied
|
||||
}
|
||||
|
||||
ngSourceTenantsCopy := copyAndSort(ng.sourceTenants)
|
||||
gSourceTenantsCopy := copyAndSort(g.sourceTenants)
|
||||
|
||||
for i := range ngSourceTenantsCopy {
|
||||
if gSourceTenantsCopy[i] != ngSourceTenantsCopy[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// GroupKey group names need not be unique across filenames.
|
||||
func GroupKey(file, name string) string {
|
||||
return file + ";" + name
|
||||
}
|
||||
|
||||
// Constants for instrumentation.
|
||||
const namespace = "prometheus"
|
||||
|
||||
// Metrics for rule evaluation.
|
||||
type Metrics struct {
|
||||
EvalDuration prometheus.Summary
|
||||
IterationDuration prometheus.Summary
|
||||
IterationsMissed *prometheus.CounterVec
|
||||
IterationsScheduled *prometheus.CounterVec
|
||||
EvalTotal *prometheus.CounterVec
|
||||
EvalFailures *prometheus.CounterVec
|
||||
GroupInterval *prometheus.GaugeVec
|
||||
GroupLastEvalTime *prometheus.GaugeVec
|
||||
GroupLastDuration *prometheus.GaugeVec
|
||||
GroupRules *prometheus.GaugeVec
|
||||
GroupSamples *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
// NewGroupMetrics creates a new instance of Metrics and registers it with the provided registerer,
|
||||
// if not nil.
|
||||
func NewGroupMetrics(reg prometheus.Registerer) *Metrics {
|
||||
m := &Metrics{
|
||||
EvalDuration: prometheus.NewSummary(
|
||||
prometheus.SummaryOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_evaluation_duration_seconds",
|
||||
Help: "The duration for a rule to execute.",
|
||||
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
|
||||
}),
|
||||
IterationDuration: prometheus.NewSummary(prometheus.SummaryOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_group_duration_seconds",
|
||||
Help: "The duration of rule group evaluations.",
|
||||
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
|
||||
}),
|
||||
IterationsMissed: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_group_iterations_missed_total",
|
||||
Help: "The total number of rule group evaluations missed due to slow rule group evaluation.",
|
||||
},
|
||||
[]string{"rule_group"},
|
||||
),
|
||||
IterationsScheduled: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_group_iterations_total",
|
||||
Help: "The total number of scheduled rule group evaluations, whether executed or missed.",
|
||||
},
|
||||
[]string{"rule_group"},
|
||||
),
|
||||
EvalTotal: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_evaluations_total",
|
||||
Help: "The total number of rule evaluations.",
|
||||
},
|
||||
[]string{"rule_group"},
|
||||
),
|
||||
EvalFailures: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_evaluation_failures_total",
|
||||
Help: "The total number of rule evaluation failures.",
|
||||
},
|
||||
[]string{"rule_group"},
|
||||
),
|
||||
GroupInterval: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_group_interval_seconds",
|
||||
Help: "The interval of a rule group.",
|
||||
},
|
||||
[]string{"rule_group"},
|
||||
),
|
||||
GroupLastEvalTime: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_group_last_evaluation_timestamp_seconds",
|
||||
Help: "The timestamp of the last rule group evaluation in seconds.",
|
||||
},
|
||||
[]string{"rule_group"},
|
||||
),
|
||||
GroupLastDuration: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_group_last_duration_seconds",
|
||||
Help: "The duration of the last rule group evaluation.",
|
||||
},
|
||||
[]string{"rule_group"},
|
||||
),
|
||||
GroupRules: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_group_rules",
|
||||
Help: "The number of rules.",
|
||||
},
|
||||
[]string{"rule_group"},
|
||||
),
|
||||
GroupSamples: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_group_last_evaluation_samples",
|
||||
Help: "The number of samples returned during the last rule group evaluation.",
|
||||
},
|
||||
[]string{"rule_group"},
|
||||
),
|
||||
}
|
||||
|
||||
if reg != nil {
|
||||
reg.MustRegister(
|
||||
m.EvalDuration,
|
||||
m.IterationDuration,
|
||||
m.IterationsMissed,
|
||||
m.IterationsScheduled,
|
||||
m.EvalTotal,
|
||||
m.EvalFailures,
|
||||
m.GroupInterval,
|
||||
m.GroupLastEvalTime,
|
||||
m.GroupLastDuration,
|
||||
m.GroupRules,
|
||||
m.GroupSamples,
|
||||
)
|
||||
}
|
||||
|
||||
return m
|
||||
}
|
932
rules/manager.go
932
rules/manager.go
|
@ -17,9 +17,7 @@ import (
|
|||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"net/url"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
@ -27,162 +25,17 @@ import (
|
|||
"github.com/go-kit/log"
|
||||
"github.com/go-kit/log/level"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/model"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
"golang.org/x/exp/slices"
|
||||
|
||||
"github.com/prometheus/prometheus/model/labels"
|
||||
"github.com/prometheus/prometheus/model/rulefmt"
|
||||
"github.com/prometheus/prometheus/model/timestamp"
|
||||
"github.com/prometheus/prometheus/model/value"
|
||||
"github.com/prometheus/prometheus/notifier"
|
||||
"github.com/prometheus/prometheus/promql"
|
||||
"github.com/prometheus/prometheus/promql/parser"
|
||||
"github.com/prometheus/prometheus/storage"
|
||||
"github.com/prometheus/prometheus/tsdb/chunkenc"
|
||||
"github.com/prometheus/prometheus/util/strutil"
|
||||
)
|
||||
|
||||
// RuleHealth describes the health state of a rule.
|
||||
type RuleHealth string
|
||||
|
||||
// The possible health states of a rule based on the last execution.
|
||||
const (
|
||||
HealthUnknown RuleHealth = "unknown"
|
||||
HealthGood RuleHealth = "ok"
|
||||
HealthBad RuleHealth = "err"
|
||||
)
|
||||
|
||||
// Constants for instrumentation.
|
||||
const namespace = "prometheus"
|
||||
|
||||
// Metrics for rule evaluation.
|
||||
type Metrics struct {
|
||||
EvalDuration prometheus.Summary
|
||||
IterationDuration prometheus.Summary
|
||||
IterationsMissed *prometheus.CounterVec
|
||||
IterationsScheduled *prometheus.CounterVec
|
||||
EvalTotal *prometheus.CounterVec
|
||||
EvalFailures *prometheus.CounterVec
|
||||
GroupInterval *prometheus.GaugeVec
|
||||
GroupLastEvalTime *prometheus.GaugeVec
|
||||
GroupLastDuration *prometheus.GaugeVec
|
||||
GroupRules *prometheus.GaugeVec
|
||||
GroupSamples *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
// NewGroupMetrics creates a new instance of Metrics and registers it with the provided registerer,
|
||||
// if not nil.
|
||||
func NewGroupMetrics(reg prometheus.Registerer) *Metrics {
|
||||
m := &Metrics{
|
||||
EvalDuration: prometheus.NewSummary(
|
||||
prometheus.SummaryOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_evaluation_duration_seconds",
|
||||
Help: "The duration for a rule to execute.",
|
||||
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
|
||||
}),
|
||||
IterationDuration: prometheus.NewSummary(prometheus.SummaryOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_group_duration_seconds",
|
||||
Help: "The duration of rule group evaluations.",
|
||||
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
|
||||
}),
|
||||
IterationsMissed: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_group_iterations_missed_total",
|
||||
Help: "The total number of rule group evaluations missed due to slow rule group evaluation.",
|
||||
},
|
||||
[]string{"rule_group"},
|
||||
),
|
||||
IterationsScheduled: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_group_iterations_total",
|
||||
Help: "The total number of scheduled rule group evaluations, whether executed or missed.",
|
||||
},
|
||||
[]string{"rule_group"},
|
||||
),
|
||||
EvalTotal: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_evaluations_total",
|
||||
Help: "The total number of rule evaluations.",
|
||||
},
|
||||
[]string{"rule_group"},
|
||||
),
|
||||
EvalFailures: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_evaluation_failures_total",
|
||||
Help: "The total number of rule evaluation failures.",
|
||||
},
|
||||
[]string{"rule_group"},
|
||||
),
|
||||
GroupInterval: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_group_interval_seconds",
|
||||
Help: "The interval of a rule group.",
|
||||
},
|
||||
[]string{"rule_group"},
|
||||
),
|
||||
GroupLastEvalTime: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_group_last_evaluation_timestamp_seconds",
|
||||
Help: "The timestamp of the last rule group evaluation in seconds.",
|
||||
},
|
||||
[]string{"rule_group"},
|
||||
),
|
||||
GroupLastDuration: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_group_last_duration_seconds",
|
||||
Help: "The duration of the last rule group evaluation.",
|
||||
},
|
||||
[]string{"rule_group"},
|
||||
),
|
||||
GroupRules: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_group_rules",
|
||||
Help: "The number of rules.",
|
||||
},
|
||||
[]string{"rule_group"},
|
||||
),
|
||||
GroupSamples: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "rule_group_last_evaluation_samples",
|
||||
Help: "The number of samples returned during the last rule group evaluation.",
|
||||
},
|
||||
[]string{"rule_group"},
|
||||
),
|
||||
}
|
||||
|
||||
if reg != nil {
|
||||
reg.MustRegister(
|
||||
m.EvalDuration,
|
||||
m.IterationDuration,
|
||||
m.IterationsMissed,
|
||||
m.IterationsScheduled,
|
||||
m.EvalTotal,
|
||||
m.EvalFailures,
|
||||
m.GroupInterval,
|
||||
m.GroupLastEvalTime,
|
||||
m.GroupLastDuration,
|
||||
m.GroupRules,
|
||||
m.GroupSamples,
|
||||
)
|
||||
}
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
// QueryFunc processes PromQL queries.
|
||||
type QueryFunc func(ctx context.Context, q string, t time.Time) (promql.Vector, error)
|
||||
|
||||
|
@ -214,256 +67,6 @@ func EngineQueryFunc(engine *promql.Engine, q storage.Queryable) QueryFunc {
|
|||
}
|
||||
}
|
||||
|
||||
// A Rule encapsulates a vector expression which is evaluated at a specified
|
||||
// interval and acted upon (currently either recorded or used for alerting).
|
||||
type Rule interface {
|
||||
Name() string
|
||||
// Labels of the rule.
|
||||
Labels() labels.Labels
|
||||
// Eval evaluates the rule, including any associated recording or alerting actions.
|
||||
// The duration passed is the evaluation delay.
|
||||
Eval(context.Context, time.Duration, time.Time, QueryFunc, *url.URL, int) (promql.Vector, error)
|
||||
// String returns a human-readable string representation of the rule.
|
||||
String() string
|
||||
// Query returns the rule query expression.
|
||||
Query() parser.Expr
|
||||
// SetLastErr sets the current error experienced by the rule.
|
||||
SetLastError(error)
|
||||
// LastErr returns the last error experienced by the rule.
|
||||
LastError() error
|
||||
// SetHealth sets the current health of the rule.
|
||||
SetHealth(RuleHealth)
|
||||
// Health returns the current health of the rule.
|
||||
Health() RuleHealth
|
||||
SetEvaluationDuration(time.Duration)
|
||||
// GetEvaluationDuration returns last evaluation duration.
|
||||
// NOTE: Used dynamically by rules.html template.
|
||||
GetEvaluationDuration() time.Duration
|
||||
SetEvaluationTimestamp(time.Time)
|
||||
// GetEvaluationTimestamp returns last evaluation timestamp.
|
||||
// NOTE: Used dynamically by rules.html template.
|
||||
GetEvaluationTimestamp() time.Time
|
||||
}
|
||||
|
||||
// Group is a set of rules that have a logical relation.
|
||||
type Group struct {
|
||||
name string
|
||||
file string
|
||||
interval time.Duration
|
||||
evaluationDelay *time.Duration
|
||||
limit int
|
||||
rules []Rule
|
||||
sourceTenants []string
|
||||
seriesInPreviousEval []map[string]labels.Labels // One per Rule.
|
||||
staleSeries []labels.Labels
|
||||
opts *ManagerOptions
|
||||
mtx sync.Mutex
|
||||
evaluationTime time.Duration
|
||||
lastEvaluation time.Time // Wall-clock time of most recent evaluation.
|
||||
lastEvalTimestamp time.Time // Time slot used for most recent evaluation.
|
||||
|
||||
shouldRestore bool
|
||||
|
||||
markStale bool
|
||||
done chan struct{}
|
||||
terminated chan struct{}
|
||||
managerDone chan struct{}
|
||||
|
||||
logger log.Logger
|
||||
|
||||
metrics *Metrics
|
||||
|
||||
// Rule group evaluation iteration function,
|
||||
// defaults to DefaultEvalIterationFunc.
|
||||
evalIterationFunc GroupEvalIterationFunc
|
||||
|
||||
alignEvaluationTimeOnInterval bool
|
||||
}
|
||||
|
||||
// GroupEvalIterationFunc is used to implement and extend rule group
|
||||
// evaluation iteration logic. It is configured in Group.evalIterationFunc,
|
||||
// and periodically invoked at each group evaluation interval to
|
||||
// evaluate the rules in the group at that point in time.
|
||||
// DefaultEvalIterationFunc is the default implementation.
|
||||
type GroupEvalIterationFunc func(ctx context.Context, g *Group, evalTimestamp time.Time)
|
||||
|
||||
type GroupOptions struct {
|
||||
Name, File string
|
||||
Interval time.Duration
|
||||
Limit int
|
||||
Rules []Rule
|
||||
SourceTenants []string
|
||||
ShouldRestore bool
|
||||
Opts *ManagerOptions
|
||||
EvaluationDelay *time.Duration
|
||||
done chan struct{}
|
||||
EvalIterationFunc GroupEvalIterationFunc
|
||||
AlignEvaluationTimeOnInterval bool
|
||||
}
|
||||
|
||||
// NewGroup makes a new Group with the given name, options, and rules.
|
||||
func NewGroup(o GroupOptions) *Group {
|
||||
metrics := o.Opts.Metrics
|
||||
if metrics == nil {
|
||||
metrics = NewGroupMetrics(o.Opts.Registerer)
|
||||
}
|
||||
|
||||
key := GroupKey(o.File, o.Name)
|
||||
metrics.IterationsMissed.WithLabelValues(key)
|
||||
metrics.IterationsScheduled.WithLabelValues(key)
|
||||
metrics.EvalTotal.WithLabelValues(key)
|
||||
metrics.EvalFailures.WithLabelValues(key)
|
||||
metrics.GroupLastEvalTime.WithLabelValues(key)
|
||||
metrics.GroupLastDuration.WithLabelValues(key)
|
||||
metrics.GroupRules.WithLabelValues(key).Set(float64(len(o.Rules)))
|
||||
metrics.GroupSamples.WithLabelValues(key)
|
||||
metrics.GroupInterval.WithLabelValues(key).Set(o.Interval.Seconds())
|
||||
|
||||
evalIterationFunc := o.EvalIterationFunc
|
||||
if evalIterationFunc == nil {
|
||||
evalIterationFunc = DefaultEvalIterationFunc
|
||||
}
|
||||
|
||||
return &Group{
|
||||
name: o.Name,
|
||||
file: o.File,
|
||||
interval: o.Interval,
|
||||
evaluationDelay: o.EvaluationDelay,
|
||||
limit: o.Limit,
|
||||
rules: o.Rules,
|
||||
shouldRestore: o.ShouldRestore,
|
||||
opts: o.Opts,
|
||||
sourceTenants: o.SourceTenants,
|
||||
seriesInPreviousEval: make([]map[string]labels.Labels, len(o.Rules)),
|
||||
done: make(chan struct{}),
|
||||
managerDone: o.done,
|
||||
terminated: make(chan struct{}),
|
||||
logger: log.With(o.Opts.Logger, "file", o.File, "group", o.Name),
|
||||
metrics: metrics,
|
||||
evalIterationFunc: evalIterationFunc,
|
||||
alignEvaluationTimeOnInterval: o.AlignEvaluationTimeOnInterval,
|
||||
}
|
||||
}
|
||||
|
||||
// Name returns the group name.
|
||||
func (g *Group) Name() string { return g.name }
|
||||
|
||||
// File returns the group's file.
|
||||
func (g *Group) File() string { return g.file }
|
||||
|
||||
// Rules returns the group's rules.
|
||||
func (g *Group) Rules() []Rule { return g.rules }
|
||||
|
||||
// Queryable returns the group's querable.
|
||||
func (g *Group) Queryable() storage.Queryable { return g.opts.Queryable }
|
||||
|
||||
// Context returns the group's context.
|
||||
func (g *Group) Context() context.Context { return g.opts.Context }
|
||||
|
||||
// Interval returns the group's interval.
|
||||
func (g *Group) Interval() time.Duration { return g.interval }
|
||||
|
||||
// Limit returns the group's limit.
|
||||
func (g *Group) Limit() int { return g.limit }
|
||||
|
||||
// SourceTenants returns the source tenants for the group.
|
||||
// If it's empty or nil, then the owning user/tenant is considered to be the source tenant.
|
||||
func (g *Group) SourceTenants() []string { return g.sourceTenants }
|
||||
|
||||
func (g *Group) Logger() log.Logger { return g.logger }
|
||||
|
||||
func (g *Group) run(ctx context.Context) {
|
||||
defer close(g.terminated)
|
||||
|
||||
// Wait an initial amount to have consistently slotted intervals.
|
||||
evalTimestamp := g.EvalTimestamp(time.Now().UnixNano()).Add(g.interval)
|
||||
select {
|
||||
case <-time.After(time.Until(evalTimestamp)):
|
||||
case <-g.done:
|
||||
return
|
||||
}
|
||||
|
||||
ctx = promql.NewOriginContext(ctx, map[string]interface{}{
|
||||
"ruleGroup": map[string]string{
|
||||
"file": g.File(),
|
||||
"name": g.Name(),
|
||||
},
|
||||
})
|
||||
|
||||
// The assumption here is that since the ticker was started after having
|
||||
// waited for `evalTimestamp` to pass, the ticks will trigger soon
|
||||
// after each `evalTimestamp + N * g.interval` occurrence.
|
||||
tick := time.NewTicker(g.interval)
|
||||
defer tick.Stop()
|
||||
|
||||
defer func() {
|
||||
if !g.markStale {
|
||||
return
|
||||
}
|
||||
go func(now time.Time) {
|
||||
for _, rule := range g.seriesInPreviousEval {
|
||||
for _, r := range rule {
|
||||
g.staleSeries = append(g.staleSeries, r)
|
||||
}
|
||||
}
|
||||
// That can be garbage collected at this point.
|
||||
g.seriesInPreviousEval = nil
|
||||
// Wait for 2 intervals to give the opportunity to renamed rules
|
||||
// to insert new series in the tsdb. At this point if there is a
|
||||
// renamed rule, it should already be started.
|
||||
select {
|
||||
case <-g.managerDone:
|
||||
case <-time.After(2 * g.interval):
|
||||
g.cleanupStaleSeries(ctx, now)
|
||||
}
|
||||
}(time.Now())
|
||||
}()
|
||||
|
||||
g.evalIterationFunc(ctx, g, evalTimestamp)
|
||||
if g.shouldRestore {
|
||||
// If we have to restore, we wait for another Eval to finish.
|
||||
// The reason behind this is, during first eval (or before it)
|
||||
// we might not have enough data scraped, and recording rules would not
|
||||
// have updated the latest values, on which some alerts might depend.
|
||||
select {
|
||||
case <-g.done:
|
||||
return
|
||||
case <-tick.C:
|
||||
missed := (time.Since(evalTimestamp) / g.interval) - 1
|
||||
if missed > 0 {
|
||||
g.metrics.IterationsMissed.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed))
|
||||
g.metrics.IterationsScheduled.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed))
|
||||
}
|
||||
evalTimestamp = evalTimestamp.Add((missed + 1) * g.interval)
|
||||
g.evalIterationFunc(ctx, g, evalTimestamp)
|
||||
}
|
||||
|
||||
g.RestoreForState(time.Now())
|
||||
g.shouldRestore = false
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-g.done:
|
||||
return
|
||||
default:
|
||||
select {
|
||||
case <-g.done:
|
||||
return
|
||||
case <-tick.C:
|
||||
missed := (time.Since(evalTimestamp) / g.interval) - 1
|
||||
if missed > 0 {
|
||||
g.metrics.IterationsMissed.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed))
|
||||
g.metrics.IterationsScheduled.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed))
|
||||
}
|
||||
evalTimestamp = evalTimestamp.Add((missed + 1) * g.interval)
|
||||
|
||||
g.evalIterationFunc(ctx, g, evalTimestamp)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// DefaultEvalIterationFunc is the default implementation of
|
||||
// GroupEvalIterationFunc that is periodically invoked to evaluate the rules
|
||||
// in a group at a given point in time and updates Group state and metrics
|
||||
|
@ -483,536 +86,6 @@ func DefaultEvalIterationFunc(ctx context.Context, g *Group, evalTimestamp time.
|
|||
g.setLastEvalTimestamp(evalTimestamp)
|
||||
}
|
||||
|
||||
func (g *Group) stop() {
|
||||
close(g.done)
|
||||
<-g.terminated
|
||||
}
|
||||
|
||||
func (g *Group) hash() uint64 {
|
||||
l := labels.New(
|
||||
labels.Label{Name: "name", Value: g.name},
|
||||
labels.Label{Name: "file", Value: g.file},
|
||||
)
|
||||
return l.Hash()
|
||||
}
|
||||
|
||||
// AlertingRules returns the list of the group's alerting rules.
|
||||
func (g *Group) AlertingRules() []*AlertingRule {
|
||||
g.mtx.Lock()
|
||||
defer g.mtx.Unlock()
|
||||
|
||||
var alerts []*AlertingRule
|
||||
for _, rule := range g.rules {
|
||||
if alertingRule, ok := rule.(*AlertingRule); ok {
|
||||
alerts = append(alerts, alertingRule)
|
||||
}
|
||||
}
|
||||
slices.SortFunc(alerts, func(a, b *AlertingRule) int {
|
||||
if a.State() == b.State() {
|
||||
return strings.Compare(a.Name(), b.Name())
|
||||
}
|
||||
return int(b.State() - a.State())
|
||||
})
|
||||
return alerts
|
||||
}
|
||||
|
||||
// HasAlertingRules returns true if the group contains at least one AlertingRule.
|
||||
func (g *Group) HasAlertingRules() bool {
|
||||
g.mtx.Lock()
|
||||
defer g.mtx.Unlock()
|
||||
|
||||
for _, rule := range g.rules {
|
||||
if _, ok := rule.(*AlertingRule); ok {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// GetEvaluationTime returns the time in seconds it took to evaluate the rule group.
|
||||
func (g *Group) GetEvaluationTime() time.Duration {
|
||||
g.mtx.Lock()
|
||||
defer g.mtx.Unlock()
|
||||
return g.evaluationTime
|
||||
}
|
||||
|
||||
// setEvaluationTime sets the time in seconds the last evaluation took.
|
||||
func (g *Group) setEvaluationTime(dur time.Duration) {
|
||||
g.metrics.GroupLastDuration.WithLabelValues(GroupKey(g.file, g.name)).Set(dur.Seconds())
|
||||
|
||||
g.mtx.Lock()
|
||||
defer g.mtx.Unlock()
|
||||
g.evaluationTime = dur
|
||||
}
|
||||
|
||||
// GetLastEvaluation returns the time the last evaluation of the rule group took place.
|
||||
func (g *Group) GetLastEvaluation() time.Time {
|
||||
g.mtx.Lock()
|
||||
defer g.mtx.Unlock()
|
||||
return g.lastEvaluation
|
||||
}
|
||||
|
||||
// setLastEvaluation updates evaluationTimestamp to the timestamp of when the rule group was last evaluated.
|
||||
func (g *Group) setLastEvaluation(ts time.Time) {
|
||||
g.metrics.GroupLastEvalTime.WithLabelValues(GroupKey(g.file, g.name)).Set(float64(ts.UnixNano()) / 1e9)
|
||||
|
||||
g.mtx.Lock()
|
||||
defer g.mtx.Unlock()
|
||||
g.lastEvaluation = ts
|
||||
}
|
||||
|
||||
// GetLastEvalTimestamp returns the timestamp of the last evaluation.
|
||||
func (g *Group) GetLastEvalTimestamp() time.Time {
|
||||
g.mtx.Lock()
|
||||
defer g.mtx.Unlock()
|
||||
return g.lastEvalTimestamp
|
||||
}
|
||||
|
||||
// setLastEvalTimestamp updates lastEvalTimestamp to the timestamp of the last evaluation.
|
||||
func (g *Group) setLastEvalTimestamp(ts time.Time) {
|
||||
g.mtx.Lock()
|
||||
defer g.mtx.Unlock()
|
||||
g.lastEvalTimestamp = ts
|
||||
}
|
||||
|
||||
// EvalTimestamp returns the immediately preceding consistently slotted evaluation time.
|
||||
func (g *Group) EvalTimestamp(startTime int64) time.Time {
|
||||
var offset int64
|
||||
if !g.alignEvaluationTimeOnInterval {
|
||||
offset = int64(g.hash() % uint64(g.interval))
|
||||
}
|
||||
var (
|
||||
// This group's evaluation times differ from the perfect time intervals by `offset` nanoseconds.
|
||||
// But we can only use `% interval` to align with the interval. And `% interval` will always
|
||||
// align with the perfect time intervals, instead of this group's. Because of this we add
|
||||
// `offset` _after_ aligning with the perfect time interval.
|
||||
//
|
||||
// There can be cases where adding `offset` to the perfect evaluation time can yield a
|
||||
// timestamp in the future, which is not what EvalTimestamp should do.
|
||||
// So we subtract one `offset` to make sure that `now - (now % interval) + offset` gives an
|
||||
// evaluation time in the past.
|
||||
adjNow = startTime - offset
|
||||
|
||||
// Adjust to perfect evaluation intervals.
|
||||
base = adjNow - (adjNow % int64(g.interval))
|
||||
|
||||
// Add one offset to randomize the evaluation times of this group.
|
||||
next = base + offset
|
||||
)
|
||||
|
||||
return time.Unix(0, next).UTC()
|
||||
}
|
||||
|
||||
func nameAndLabels(rule Rule) string {
|
||||
return rule.Name() + rule.Labels().String()
|
||||
}
|
||||
|
||||
// CopyState copies the alerting rule and staleness related state from the given group.
|
||||
//
|
||||
// Rules are matched based on their name and labels. If there are duplicates, the
|
||||
// first is matched with the first, second with the second etc.
|
||||
func (g *Group) CopyState(from *Group) {
|
||||
g.evaluationTime = from.evaluationTime
|
||||
g.lastEvaluation = from.lastEvaluation
|
||||
|
||||
ruleMap := make(map[string][]int, len(from.rules))
|
||||
|
||||
for fi, fromRule := range from.rules {
|
||||
nameAndLabels := nameAndLabels(fromRule)
|
||||
l := ruleMap[nameAndLabels]
|
||||
ruleMap[nameAndLabels] = append(l, fi)
|
||||
}
|
||||
|
||||
for i, rule := range g.rules {
|
||||
nameAndLabels := nameAndLabels(rule)
|
||||
indexes := ruleMap[nameAndLabels]
|
||||
if len(indexes) == 0 {
|
||||
continue
|
||||
}
|
||||
fi := indexes[0]
|
||||
g.seriesInPreviousEval[i] = from.seriesInPreviousEval[fi]
|
||||
ruleMap[nameAndLabels] = indexes[1:]
|
||||
|
||||
ar, ok := rule.(*AlertingRule)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
far, ok := from.rules[fi].(*AlertingRule)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
for fp, a := range far.active {
|
||||
ar.active[fp] = a
|
||||
}
|
||||
}
|
||||
|
||||
// Handle deleted and unmatched duplicate rules.
|
||||
g.staleSeries = from.staleSeries
|
||||
for fi, fromRule := range from.rules {
|
||||
nameAndLabels := nameAndLabels(fromRule)
|
||||
l := ruleMap[nameAndLabels]
|
||||
if len(l) != 0 {
|
||||
for _, series := range from.seriesInPreviousEval[fi] {
|
||||
g.staleSeries = append(g.staleSeries, series)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Eval runs a single evaluation cycle in which all rules are evaluated sequentially.
|
||||
func (g *Group) Eval(ctx context.Context, ts time.Time) {
|
||||
var samplesTotal float64
|
||||
evaluationDelay := g.EvaluationDelay()
|
||||
for i, rule := range g.rules {
|
||||
select {
|
||||
case <-g.done:
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
func(i int, rule Rule) {
|
||||
logger := log.WithPrefix(g.logger, "name", rule.Name(), "index", i)
|
||||
ctx, sp := otel.Tracer("").Start(ctx, "rule")
|
||||
sp.SetAttributes(attribute.String("name", rule.Name()))
|
||||
defer func(t time.Time) {
|
||||
sp.End()
|
||||
|
||||
since := time.Since(t)
|
||||
g.metrics.EvalDuration.Observe(since.Seconds())
|
||||
rule.SetEvaluationDuration(since)
|
||||
rule.SetEvaluationTimestamp(t)
|
||||
}(time.Now())
|
||||
|
||||
if sp.SpanContext().IsSampled() && sp.SpanContext().HasTraceID() {
|
||||
logger = log.WithPrefix(g.logger, "traceID", sp.SpanContext().TraceID())
|
||||
}
|
||||
|
||||
g.metrics.EvalTotal.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
|
||||
|
||||
vector, err := rule.Eval(ctx, evaluationDelay, ts, g.opts.QueryFunc, g.opts.ExternalURL, g.Limit())
|
||||
if err != nil {
|
||||
rule.SetHealth(HealthBad)
|
||||
rule.SetLastError(err)
|
||||
sp.SetStatus(codes.Error, err.Error())
|
||||
g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
|
||||
|
||||
// Canceled queries are intentional termination of queries. This normally
|
||||
// happens on shutdown and thus we skip logging of any errors here.
|
||||
var eqc promql.ErrQueryCanceled
|
||||
if !errors.As(err, &eqc) {
|
||||
level.Warn(logger).Log("msg", "Evaluating rule failed", "rule", rule, "err", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
rule.SetHealth(HealthGood)
|
||||
rule.SetLastError(nil)
|
||||
samplesTotal += float64(len(vector))
|
||||
|
||||
if ar, ok := rule.(*AlertingRule); ok {
|
||||
ar.sendAlerts(ctx, ts, g.opts.ResendDelay, g.interval, g.opts.NotifyFunc)
|
||||
}
|
||||
var (
|
||||
numOutOfOrder = 0
|
||||
numTooOld = 0
|
||||
numDuplicates = 0
|
||||
)
|
||||
|
||||
app := g.opts.Appendable.Appender(ctx)
|
||||
seriesReturned := make(map[string]labels.Labels, len(g.seriesInPreviousEval[i]))
|
||||
defer func() {
|
||||
if err := app.Commit(); err != nil {
|
||||
rule.SetHealth(HealthBad)
|
||||
rule.SetLastError(err)
|
||||
sp.SetStatus(codes.Error, err.Error())
|
||||
g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
|
||||
|
||||
level.Warn(logger).Log("msg", "Rule sample appending failed", "err", err)
|
||||
return
|
||||
}
|
||||
g.seriesInPreviousEval[i] = seriesReturned
|
||||
}()
|
||||
|
||||
for _, s := range vector {
|
||||
if s.H != nil {
|
||||
_, err = app.AppendHistogram(0, s.Metric, s.T, nil, s.H)
|
||||
} else {
|
||||
_, err = app.Append(0, s.Metric, s.T, s.F)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
rule.SetHealth(HealthBad)
|
||||
rule.SetLastError(err)
|
||||
sp.SetStatus(codes.Error, err.Error())
|
||||
unwrappedErr := errors.Unwrap(err)
|
||||
if unwrappedErr == nil {
|
||||
unwrappedErr = err
|
||||
}
|
||||
switch {
|
||||
case errors.Is(unwrappedErr, storage.ErrOutOfOrderSample):
|
||||
numOutOfOrder++
|
||||
level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
|
||||
case errors.Is(unwrappedErr, storage.ErrTooOldSample):
|
||||
numTooOld++
|
||||
level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
|
||||
case errors.Is(unwrappedErr, storage.ErrDuplicateSampleForTimestamp):
|
||||
numDuplicates++
|
||||
level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
|
||||
default:
|
||||
level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
|
||||
}
|
||||
} else {
|
||||
buf := [1024]byte{}
|
||||
seriesReturned[string(s.Metric.Bytes(buf[:]))] = s.Metric
|
||||
}
|
||||
}
|
||||
if numOutOfOrder > 0 {
|
||||
level.Warn(logger).Log("msg", "Error on ingesting out-of-order result from rule evaluation", "numDropped", numOutOfOrder)
|
||||
}
|
||||
if numTooOld > 0 {
|
||||
level.Warn(logger).Log("msg", "Error on ingesting too old result from rule evaluation", "numDropped", numTooOld)
|
||||
}
|
||||
if numDuplicates > 0 {
|
||||
level.Warn(logger).Log("msg", "Error on ingesting results from rule evaluation with different value but same timestamp", "numDropped", numDuplicates)
|
||||
}
|
||||
|
||||
for metric, lset := range g.seriesInPreviousEval[i] {
|
||||
if _, ok := seriesReturned[metric]; !ok {
|
||||
// Series no longer exposed, mark it stale.
|
||||
_, err = app.Append(0, lset, timestamp.FromTime(ts.Add(-evaluationDelay)), math.Float64frombits(value.StaleNaN))
|
||||
unwrappedErr := errors.Unwrap(err)
|
||||
if unwrappedErr == nil {
|
||||
unwrappedErr = err
|
||||
}
|
||||
switch {
|
||||
case unwrappedErr == nil:
|
||||
case errors.Is(unwrappedErr, storage.ErrOutOfOrderSample),
|
||||
errors.Is(unwrappedErr, storage.ErrTooOldSample),
|
||||
errors.Is(unwrappedErr, storage.ErrDuplicateSampleForTimestamp):
|
||||
// Do not count these in logging, as this is expected if series
|
||||
// is exposed from a different rule.
|
||||
default:
|
||||
level.Warn(logger).Log("msg", "Adding stale sample failed", "sample", lset.String(), "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}(i, rule)
|
||||
}
|
||||
if g.metrics != nil {
|
||||
g.metrics.GroupSamples.WithLabelValues(GroupKey(g.File(), g.Name())).Set(samplesTotal)
|
||||
}
|
||||
g.cleanupStaleSeries(ctx, ts)
|
||||
}
|
||||
|
||||
func (g *Group) EvaluationDelay() time.Duration {
|
||||
if g.evaluationDelay != nil {
|
||||
return *g.evaluationDelay
|
||||
}
|
||||
if g.opts.DefaultEvaluationDelay != nil {
|
||||
return g.opts.DefaultEvaluationDelay()
|
||||
}
|
||||
return time.Duration(0)
|
||||
}
|
||||
|
||||
func (g *Group) cleanupStaleSeries(ctx context.Context, ts time.Time) {
|
||||
if len(g.staleSeries) == 0 {
|
||||
return
|
||||
}
|
||||
app := g.opts.Appendable.Appender(ctx)
|
||||
evaluationDelay := g.EvaluationDelay()
|
||||
for _, s := range g.staleSeries {
|
||||
// Rule that produced series no longer configured, mark it stale.
|
||||
_, err := app.Append(0, s, timestamp.FromTime(ts.Add(-evaluationDelay)), math.Float64frombits(value.StaleNaN))
|
||||
unwrappedErr := errors.Unwrap(err)
|
||||
if unwrappedErr == nil {
|
||||
unwrappedErr = err
|
||||
}
|
||||
switch {
|
||||
case unwrappedErr == nil:
|
||||
case errors.Is(unwrappedErr, storage.ErrOutOfOrderSample),
|
||||
errors.Is(unwrappedErr, storage.ErrTooOldSample),
|
||||
errors.Is(unwrappedErr, storage.ErrDuplicateSampleForTimestamp):
|
||||
// Do not count these in logging, as this is expected if series
|
||||
// is exposed from a different rule.
|
||||
default:
|
||||
level.Warn(g.logger).Log("msg", "Adding stale sample for previous configuration failed", "sample", s, "err", err)
|
||||
}
|
||||
}
|
||||
if err := app.Commit(); err != nil {
|
||||
level.Warn(g.logger).Log("msg", "Stale sample appending for previous configuration failed", "err", err)
|
||||
} else {
|
||||
g.staleSeries = nil
|
||||
}
|
||||
}
|
||||
|
||||
// RestoreForState restores the 'for' state of the alerts
|
||||
// by looking up last ActiveAt from storage.
|
||||
func (g *Group) RestoreForState(ts time.Time) {
|
||||
maxtMS := int64(model.TimeFromUnixNano(ts.UnixNano()))
|
||||
// We allow restoration only if alerts were active before after certain time.
|
||||
mint := ts.Add(-g.opts.OutageTolerance)
|
||||
mintMS := int64(model.TimeFromUnixNano(mint.UnixNano()))
|
||||
q, err := g.opts.Queryable.Querier(mintMS, maxtMS)
|
||||
if err != nil {
|
||||
level.Error(g.logger).Log("msg", "Failed to get Querier", "err", err)
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if err := q.Close(); err != nil {
|
||||
level.Error(g.logger).Log("msg", "Failed to close Querier", "err", err)
|
||||
}
|
||||
}()
|
||||
|
||||
for _, rule := range g.Rules() {
|
||||
alertRule, ok := rule.(*AlertingRule)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
alertHoldDuration := alertRule.HoldDuration()
|
||||
if alertHoldDuration < g.opts.ForGracePeriod {
|
||||
// If alertHoldDuration is already less than grace period, we would not
|
||||
// like to make it wait for `g.opts.ForGracePeriod` time before firing.
|
||||
// Hence we skip restoration, which will make it wait for alertHoldDuration.
|
||||
alertRule.SetRestored(true)
|
||||
continue
|
||||
}
|
||||
|
||||
alertRule.ForEachActiveAlert(func(a *Alert) {
|
||||
var s storage.Series
|
||||
|
||||
s, err := alertRule.QueryforStateSeries(g.opts.Context, a, q)
|
||||
if err != nil {
|
||||
// Querier Warnings are ignored. We do not care unless we have an error.
|
||||
level.Error(g.logger).Log(
|
||||
"msg", "Failed to restore 'for' state",
|
||||
labels.AlertName, alertRule.Name(),
|
||||
"stage", "Select",
|
||||
"err", err,
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
if s == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Series found for the 'for' state.
|
||||
var t int64
|
||||
var v float64
|
||||
it := s.Iterator(nil)
|
||||
for it.Next() == chunkenc.ValFloat {
|
||||
t, v = it.At()
|
||||
}
|
||||
if it.Err() != nil {
|
||||
level.Error(g.logger).Log("msg", "Failed to restore 'for' state",
|
||||
labels.AlertName, alertRule.Name(), "stage", "Iterator", "err", it.Err())
|
||||
return
|
||||
}
|
||||
if value.IsStaleNaN(v) { // Alert was not active.
|
||||
return
|
||||
}
|
||||
|
||||
downAt := time.Unix(t/1000, 0).UTC()
|
||||
restoredActiveAt := time.Unix(int64(v), 0).UTC()
|
||||
timeSpentPending := downAt.Sub(restoredActiveAt)
|
||||
timeRemainingPending := alertHoldDuration - timeSpentPending
|
||||
|
||||
switch {
|
||||
case timeRemainingPending <= 0:
|
||||
// It means that alert was firing when prometheus went down.
|
||||
// In the next Eval, the state of this alert will be set back to
|
||||
// firing again if it's still firing in that Eval.
|
||||
// Nothing to be done in this case.
|
||||
case timeRemainingPending < g.opts.ForGracePeriod:
|
||||
// (new) restoredActiveAt = (ts + m.opts.ForGracePeriod) - alertHoldDuration
|
||||
// /* new firing time */ /* moving back by hold duration */
|
||||
//
|
||||
// Proof of correctness:
|
||||
// firingTime = restoredActiveAt.Add(alertHoldDuration)
|
||||
// = ts + m.opts.ForGracePeriod - alertHoldDuration + alertHoldDuration
|
||||
// = ts + m.opts.ForGracePeriod
|
||||
//
|
||||
// Time remaining to fire = firingTime.Sub(ts)
|
||||
// = (ts + m.opts.ForGracePeriod) - ts
|
||||
// = m.opts.ForGracePeriod
|
||||
restoredActiveAt = ts.Add(g.opts.ForGracePeriod).Add(-alertHoldDuration)
|
||||
default:
|
||||
// By shifting ActiveAt to the future (ActiveAt + some_duration),
|
||||
// the total pending time from the original ActiveAt
|
||||
// would be `alertHoldDuration + some_duration`.
|
||||
// Here, some_duration = downDuration.
|
||||
downDuration := ts.Sub(downAt)
|
||||
restoredActiveAt = restoredActiveAt.Add(downDuration)
|
||||
}
|
||||
|
||||
a.ActiveAt = restoredActiveAt
|
||||
level.Debug(g.logger).Log("msg", "'for' state restored",
|
||||
labels.AlertName, alertRule.Name(), "restored_time", a.ActiveAt.Format(time.RFC850),
|
||||
"labels", a.Labels.String())
|
||||
})
|
||||
|
||||
alertRule.SetRestored(true)
|
||||
}
|
||||
}
|
||||
|
||||
// Equals return if two groups are the same.
|
||||
func (g *Group) Equals(ng *Group) bool {
|
||||
if g.name != ng.name {
|
||||
return false
|
||||
}
|
||||
|
||||
if g.file != ng.file {
|
||||
return false
|
||||
}
|
||||
|
||||
if g.interval != ng.interval {
|
||||
return false
|
||||
}
|
||||
|
||||
if g.limit != ng.limit {
|
||||
return false
|
||||
}
|
||||
|
||||
if len(g.rules) != len(ng.rules) {
|
||||
return false
|
||||
}
|
||||
|
||||
if g.alignEvaluationTimeOnInterval != ng.alignEvaluationTimeOnInterval {
|
||||
return false
|
||||
}
|
||||
|
||||
for i, gr := range g.rules {
|
||||
if gr.String() != ng.rules[i].String() {
|
||||
return false
|
||||
}
|
||||
}
|
||||
{
|
||||
// compare source tenants
|
||||
if len(g.sourceTenants) != len(ng.sourceTenants) {
|
||||
return false
|
||||
}
|
||||
|
||||
copyAndSort := func(x []string) []string {
|
||||
copied := make([]string, len(x))
|
||||
copy(copied, x)
|
||||
sort.Strings(copied)
|
||||
return copied
|
||||
}
|
||||
|
||||
ngSourceTenantsCopy := copyAndSort(ng.sourceTenants)
|
||||
gSourceTenantsCopy := copyAndSort(g.sourceTenants)
|
||||
|
||||
for i := range ngSourceTenantsCopy {
|
||||
if gSourceTenantsCopy[i] != ngSourceTenantsCopy[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// The Manager manages recording and alerting rules.
|
||||
type Manager struct {
|
||||
opts *ManagerOptions
|
||||
|
@ -1270,11 +343,6 @@ func (m *Manager) LoadGroups(
|
|||
return groups, nil
|
||||
}
|
||||
|
||||
// GroupKey group names need not be unique across filenames.
|
||||
func GroupKey(file, name string) string {
|
||||
return file + ";" + name
|
||||
}
|
||||
|
||||
// RuleGroups returns the list of manager's rule groups.
|
||||
func (m *Manager) RuleGroups() []*Group {
|
||||
m.mtx.RLock()
|
||||
|
|
65
rules/rule.go
Normal file
65
rules/rule.go
Normal file
|
@ -0,0 +1,65 @@
|
|||
// Copyright 2013 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package rules
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/url"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/prometheus/model/labels"
|
||||
"github.com/prometheus/prometheus/promql"
|
||||
"github.com/prometheus/prometheus/promql/parser"
|
||||
)
|
||||
|
||||
// RuleHealth describes the health state of a rule.
|
||||
type RuleHealth string
|
||||
|
||||
// The possible health states of a rule based on the last execution.
|
||||
const (
|
||||
HealthUnknown RuleHealth = "unknown"
|
||||
HealthGood RuleHealth = "ok"
|
||||
HealthBad RuleHealth = "err"
|
||||
)
|
||||
|
||||
// A Rule encapsulates a vector expression which is evaluated at a specified
|
||||
// interval and acted upon (currently either recorded or used for alerting).
|
||||
type Rule interface {
|
||||
Name() string
|
||||
// Labels of the rule.
|
||||
Labels() labels.Labels
|
||||
// Eval evaluates the rule, including any associated recording or alerting actions.
|
||||
// The duration passed is the evaluation delay.
|
||||
Eval(context.Context, time.Duration, time.Time, QueryFunc, *url.URL, int) (promql.Vector, error)
|
||||
// String returns a human-readable string representation of the rule.
|
||||
String() string
|
||||
// Query returns the rule query expression.
|
||||
Query() parser.Expr
|
||||
// SetLastError sets the current error experienced by the rule.
|
||||
SetLastError(error)
|
||||
// LastError returns the last error experienced by the rule.
|
||||
LastError() error
|
||||
// SetHealth sets the current health of the rule.
|
||||
SetHealth(RuleHealth)
|
||||
// Health returns the current health of the rule.
|
||||
Health() RuleHealth
|
||||
SetEvaluationDuration(time.Duration)
|
||||
// GetEvaluationDuration returns last evaluation duration.
|
||||
// NOTE: Used dynamically by rules.html template.
|
||||
GetEvaluationDuration() time.Duration
|
||||
SetEvaluationTimestamp(time.Time)
|
||||
// GetEvaluationTimestamp returns last evaluation timestamp.
|
||||
// NOTE: Used dynamically by rules.html template.
|
||||
GetEvaluationTimestamp() time.Time
|
||||
}
|
|
@ -34,80 +34,20 @@ import (
|
|||
"github.com/prometheus/prometheus/util/osutil"
|
||||
)
|
||||
|
||||
var targetMetadataCache = newMetadataMetricsCollector()
|
||||
|
||||
// MetadataMetricsCollector is a Custom Collector for the metadata cache metrics.
|
||||
type MetadataMetricsCollector struct {
|
||||
CacheEntries *prometheus.Desc
|
||||
CacheBytes *prometheus.Desc
|
||||
|
||||
scrapeManager *Manager
|
||||
}
|
||||
|
||||
func newMetadataMetricsCollector() *MetadataMetricsCollector {
|
||||
return &MetadataMetricsCollector{
|
||||
CacheEntries: prometheus.NewDesc(
|
||||
"prometheus_target_metadata_cache_entries",
|
||||
"Total number of metric metadata entries in the cache",
|
||||
[]string{"scrape_job"},
|
||||
nil,
|
||||
),
|
||||
CacheBytes: prometheus.NewDesc(
|
||||
"prometheus_target_metadata_cache_bytes",
|
||||
"The number of bytes that are currently used for storing metric metadata in the cache",
|
||||
[]string{"scrape_job"},
|
||||
nil,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
func (mc *MetadataMetricsCollector) registerManager(m *Manager) {
|
||||
mc.scrapeManager = m
|
||||
}
|
||||
|
||||
// Describe sends the metrics descriptions to the channel.
|
||||
func (mc *MetadataMetricsCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
ch <- mc.CacheEntries
|
||||
ch <- mc.CacheBytes
|
||||
}
|
||||
|
||||
// Collect creates and sends the metrics for the metadata cache.
|
||||
func (mc *MetadataMetricsCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
if mc.scrapeManager == nil {
|
||||
return
|
||||
}
|
||||
|
||||
for tset, targets := range mc.scrapeManager.TargetsActive() {
|
||||
var size, length int
|
||||
for _, t := range targets {
|
||||
size += t.MetadataSize()
|
||||
length += t.MetadataLength()
|
||||
}
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
mc.CacheEntries,
|
||||
prometheus.GaugeValue,
|
||||
float64(length),
|
||||
tset,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
mc.CacheBytes,
|
||||
prometheus.GaugeValue,
|
||||
float64(size),
|
||||
tset,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// NewManager is the Manager constructor
|
||||
func NewManager(o *Options, logger log.Logger, app storage.Appendable) *Manager {
|
||||
func NewManager(o *Options, logger log.Logger, app storage.Appendable, registerer prometheus.Registerer) (*Manager, error) {
|
||||
if o == nil {
|
||||
o = &Options{}
|
||||
}
|
||||
if logger == nil {
|
||||
logger = log.NewNopLogger()
|
||||
}
|
||||
|
||||
sm, err := newScrapeMetrics(registerer)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create scrape manager due to error: %w", err)
|
||||
}
|
||||
|
||||
m := &Manager{
|
||||
append: app,
|
||||
opts: o,
|
||||
|
@ -116,10 +56,12 @@ func NewManager(o *Options, logger log.Logger, app storage.Appendable) *Manager
|
|||
scrapePools: make(map[string]*scrapePool),
|
||||
graceShut: make(chan struct{}),
|
||||
triggerReload: make(chan struct{}, 1),
|
||||
metrics: sm,
|
||||
}
|
||||
targetMetadataCache.registerManager(m)
|
||||
|
||||
return m
|
||||
m.metrics.setTargetMetadataCacheGatherer(m)
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Options are the configuration parameters to the scrape manager.
|
||||
|
@ -154,6 +96,8 @@ type Manager struct {
|
|||
targetSets map[string][]*targetgroup.Group
|
||||
|
||||
triggerReload chan struct{}
|
||||
|
||||
metrics *scrapeMetrics
|
||||
}
|
||||
|
||||
// Run receives and saves target set updates and triggers the scraping loops reloading.
|
||||
|
@ -211,8 +155,10 @@ func (m *Manager) reload() {
|
|||
level.Error(m.logger).Log("msg", "error reloading target set", "err", "invalid config id:"+setName)
|
||||
continue
|
||||
}
|
||||
sp, err := newScrapePool(scrapeConfig, m.append, m.offsetSeed, log.With(m.logger, "scrape_pool", setName), m.opts)
|
||||
m.metrics.targetScrapePools.Inc()
|
||||
sp, err := newScrapePool(scrapeConfig, m.append, m.offsetSeed, log.With(m.logger, "scrape_pool", setName), m.opts, m.metrics)
|
||||
if err != nil {
|
||||
m.metrics.targetScrapePoolsFailed.Inc()
|
||||
level.Error(m.logger).Log("msg", "error creating new scrape pool", "err", err, "scrape_pool", setName)
|
||||
continue
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/stretchr/testify/require"
|
||||
"gopkg.in/yaml.v2"
|
||||
|
@ -492,10 +493,13 @@ scrape_configs:
|
|||
cfg3 = loadConfiguration(t, cfgText3)
|
||||
|
||||
ch = make(chan struct{}, 1)
|
||||
|
||||
testRegistry = prometheus.NewRegistry()
|
||||
)
|
||||
|
||||
opts := Options{}
|
||||
scrapeManager := NewManager(&opts, nil, nil)
|
||||
scrapeManager, err := NewManager(&opts, nil, nil, testRegistry)
|
||||
require.NoError(t, err)
|
||||
newLoop := func(scrapeLoopOptions) loop {
|
||||
ch <- struct{}{}
|
||||
return noopLoop()
|
||||
|
@ -512,6 +516,7 @@ scrape_configs:
|
|||
logger: nil,
|
||||
config: cfg1.ScrapeConfigs[0],
|
||||
client: http.DefaultClient,
|
||||
metrics: scrapeManager.metrics,
|
||||
}
|
||||
scrapeManager.scrapePools = map[string]*scrapePool{
|
||||
"job1": sp,
|
||||
|
@ -560,7 +565,9 @@ scrape_configs:
|
|||
|
||||
func TestManagerTargetsUpdates(t *testing.T) {
|
||||
opts := Options{}
|
||||
m := NewManager(&opts, nil, nil)
|
||||
testRegistry := prometheus.NewRegistry()
|
||||
m, err := NewManager(&opts, nil, nil, testRegistry)
|
||||
require.NoError(t, err)
|
||||
|
||||
ts := make(chan map[string][]*targetgroup.Group)
|
||||
go m.Run(ts)
|
||||
|
@ -613,7 +620,9 @@ global:
|
|||
}
|
||||
|
||||
opts := Options{}
|
||||
scrapeManager := NewManager(&opts, nil, nil)
|
||||
testRegistry := prometheus.NewRegistry()
|
||||
scrapeManager, err := NewManager(&opts, nil, nil, testRegistry)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Load the first config.
|
||||
cfg1 := getConfig("ha1")
|
||||
|
@ -658,8 +667,9 @@ scrape_configs:
|
|||
- targets: ["foo:9093"]
|
||||
`
|
||||
var (
|
||||
cfg1 = loadConfiguration(t, cfgText1)
|
||||
cfg2 = loadConfiguration(t, cfgText2)
|
||||
cfg1 = loadConfiguration(t, cfgText1)
|
||||
cfg2 = loadConfiguration(t, cfgText2)
|
||||
testRegistry = prometheus.NewRegistry()
|
||||
)
|
||||
|
||||
reload := func(scrapeManager *Manager, cfg *config.Config) {
|
||||
|
@ -695,7 +705,8 @@ scrape_configs:
|
|||
}
|
||||
|
||||
opts := Options{}
|
||||
scrapeManager := NewManager(&opts, nil, nil)
|
||||
scrapeManager, err := NewManager(&opts, nil, nil, testRegistry)
|
||||
require.NoError(t, err)
|
||||
|
||||
reload(scrapeManager, cfg1)
|
||||
require.ElementsMatch(t, []string{"job1", "job2"}, scrapeManager.ScrapePools())
|
||||
|
|
307
scrape/metrics.go
Normal file
307
scrape/metrics.go
Normal file
|
@ -0,0 +1,307 @@
|
|||
// Copyright 2016 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scrape
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
type scrapeMetrics struct {
|
||||
// Used by Manager.
|
||||
targetMetadataCache *MetadataMetricsCollector
|
||||
targetScrapePools prometheus.Counter
|
||||
targetScrapePoolsFailed prometheus.Counter
|
||||
|
||||
// Used by scrapePool.
|
||||
targetReloadIntervalLength *prometheus.SummaryVec
|
||||
targetScrapePoolReloads prometheus.Counter
|
||||
targetScrapePoolReloadsFailed prometheus.Counter
|
||||
targetScrapePoolSyncsCounter *prometheus.CounterVec
|
||||
targetScrapePoolExceededTargetLimit prometheus.Counter
|
||||
targetScrapePoolTargetLimit *prometheus.GaugeVec
|
||||
targetScrapePoolTargetsAdded *prometheus.GaugeVec
|
||||
targetSyncIntervalLength *prometheus.SummaryVec
|
||||
targetSyncFailed *prometheus.CounterVec
|
||||
|
||||
// Used by targetScraper.
|
||||
targetScrapeExceededBodySizeLimit prometheus.Counter
|
||||
|
||||
// Used by scrapeCache.
|
||||
targetScrapeCacheFlushForced prometheus.Counter
|
||||
|
||||
// Used by scrapeLoop.
|
||||
targetIntervalLength *prometheus.SummaryVec
|
||||
targetScrapeSampleLimit prometheus.Counter
|
||||
targetScrapeSampleDuplicate prometheus.Counter
|
||||
targetScrapeSampleOutOfOrder prometheus.Counter
|
||||
targetScrapeSampleOutOfBounds prometheus.Counter
|
||||
targetScrapeExemplarOutOfOrder prometheus.Counter
|
||||
targetScrapePoolExceededLabelLimits prometheus.Counter
|
||||
targetScrapeNativeHistogramBucketLimit prometheus.Counter
|
||||
}
|
||||
|
||||
func newScrapeMetrics(reg prometheus.Registerer) (*scrapeMetrics, error) {
|
||||
sm := &scrapeMetrics{}
|
||||
|
||||
// Manager metrics.
|
||||
sm.targetMetadataCache = &MetadataMetricsCollector{
|
||||
CacheEntries: prometheus.NewDesc(
|
||||
"prometheus_target_metadata_cache_entries",
|
||||
"Total number of metric metadata entries in the cache",
|
||||
[]string{"scrape_job"},
|
||||
nil,
|
||||
),
|
||||
CacheBytes: prometheus.NewDesc(
|
||||
"prometheus_target_metadata_cache_bytes",
|
||||
"The number of bytes that are currently used for storing metric metadata in the cache",
|
||||
[]string{"scrape_job"},
|
||||
nil,
|
||||
),
|
||||
// TargetsGatherer should be set later, because it's a circular dependency.
|
||||
// newScrapeMetrics() is called by NewManager(), while also TargetsGatherer is the new Manager.
|
||||
}
|
||||
|
||||
sm.targetScrapePools = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrape_pools_total",
|
||||
Help: "Total number of scrape pool creation attempts.",
|
||||
},
|
||||
)
|
||||
sm.targetScrapePoolsFailed = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrape_pools_failed_total",
|
||||
Help: "Total number of scrape pool creations that failed.",
|
||||
},
|
||||
)
|
||||
|
||||
// Used by scrapePool.
|
||||
sm.targetReloadIntervalLength = prometheus.NewSummaryVec(
|
||||
prometheus.SummaryOpts{
|
||||
Name: "prometheus_target_reload_length_seconds",
|
||||
Help: "Actual interval to reload the scrape pool with a given configuration.",
|
||||
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
|
||||
},
|
||||
[]string{"interval"},
|
||||
)
|
||||
sm.targetScrapePoolReloads = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrape_pool_reloads_total",
|
||||
Help: "Total number of scrape pool reloads.",
|
||||
},
|
||||
)
|
||||
sm.targetScrapePoolReloadsFailed = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrape_pool_reloads_failed_total",
|
||||
Help: "Total number of failed scrape pool reloads.",
|
||||
},
|
||||
)
|
||||
sm.targetScrapePoolExceededTargetLimit = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrape_pool_exceeded_target_limit_total",
|
||||
Help: "Total number of times scrape pools hit the target limit, during sync or config reload.",
|
||||
},
|
||||
)
|
||||
sm.targetScrapePoolTargetLimit = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "prometheus_target_scrape_pool_target_limit",
|
||||
Help: "Maximum number of targets allowed in this scrape pool.",
|
||||
},
|
||||
[]string{"scrape_job"},
|
||||
)
|
||||
sm.targetScrapePoolTargetsAdded = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "prometheus_target_scrape_pool_targets",
|
||||
Help: "Current number of targets in this scrape pool.",
|
||||
},
|
||||
[]string{"scrape_job"},
|
||||
)
|
||||
sm.targetScrapePoolSyncsCounter = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrape_pool_sync_total",
|
||||
Help: "Total number of syncs that were executed on a scrape pool.",
|
||||
},
|
||||
[]string{"scrape_job"},
|
||||
)
|
||||
sm.targetSyncIntervalLength = prometheus.NewSummaryVec(
|
||||
prometheus.SummaryOpts{
|
||||
Name: "prometheus_target_sync_length_seconds",
|
||||
Help: "Actual interval to sync the scrape pool.",
|
||||
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
|
||||
},
|
||||
[]string{"scrape_job"},
|
||||
)
|
||||
sm.targetSyncFailed = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_sync_failed_total",
|
||||
Help: "Total number of target sync failures.",
|
||||
},
|
||||
[]string{"scrape_job"},
|
||||
)
|
||||
|
||||
// Used by targetScraper.
|
||||
sm.targetScrapeExceededBodySizeLimit = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrapes_exceeded_body_size_limit_total",
|
||||
Help: "Total number of scrapes that hit the body size limit",
|
||||
},
|
||||
)
|
||||
|
||||
// Used by scrapeCache.
|
||||
sm.targetScrapeCacheFlushForced = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrapes_cache_flush_forced_total",
|
||||
Help: "How many times a scrape cache was flushed due to getting big while scrapes are failing.",
|
||||
},
|
||||
)
|
||||
|
||||
// Used by scrapeLoop.
|
||||
sm.targetIntervalLength = prometheus.NewSummaryVec(
|
||||
prometheus.SummaryOpts{
|
||||
Name: "prometheus_target_interval_length_seconds",
|
||||
Help: "Actual intervals between scrapes.",
|
||||
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
|
||||
},
|
||||
[]string{"interval"},
|
||||
)
|
||||
sm.targetScrapeSampleLimit = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrapes_exceeded_sample_limit_total",
|
||||
Help: "Total number of scrapes that hit the sample limit and were rejected.",
|
||||
},
|
||||
)
|
||||
sm.targetScrapeSampleDuplicate = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrapes_sample_duplicate_timestamp_total",
|
||||
Help: "Total number of samples rejected due to duplicate timestamps but different values.",
|
||||
},
|
||||
)
|
||||
sm.targetScrapeSampleOutOfOrder = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrapes_sample_out_of_order_total",
|
||||
Help: "Total number of samples rejected due to not being out of the expected order.",
|
||||
},
|
||||
)
|
||||
sm.targetScrapeSampleOutOfBounds = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrapes_sample_out_of_bounds_total",
|
||||
Help: "Total number of samples rejected due to timestamp falling outside of the time bounds.",
|
||||
},
|
||||
)
|
||||
sm.targetScrapePoolExceededLabelLimits = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrape_pool_exceeded_label_limits_total",
|
||||
Help: "Total number of times scrape pools hit the label limits, during sync or config reload.",
|
||||
},
|
||||
)
|
||||
sm.targetScrapeNativeHistogramBucketLimit = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrapes_exceeded_native_histogram_bucket_limit_total",
|
||||
Help: "Total number of scrapes that hit the native histogram bucket limit and were rejected.",
|
||||
},
|
||||
)
|
||||
sm.targetScrapeExemplarOutOfOrder = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrapes_exemplar_out_of_order_total",
|
||||
Help: "Total number of exemplar rejected due to not being out of the expected order.",
|
||||
},
|
||||
)
|
||||
|
||||
for _, collector := range []prometheus.Collector{
|
||||
// Used by Manager.
|
||||
sm.targetMetadataCache,
|
||||
sm.targetScrapePools,
|
||||
sm.targetScrapePoolsFailed,
|
||||
// Used by scrapePool.
|
||||
sm.targetReloadIntervalLength,
|
||||
sm.targetScrapePoolReloads,
|
||||
sm.targetScrapePoolReloadsFailed,
|
||||
sm.targetSyncIntervalLength,
|
||||
sm.targetScrapePoolSyncsCounter,
|
||||
sm.targetScrapePoolExceededTargetLimit,
|
||||
sm.targetScrapePoolTargetLimit,
|
||||
sm.targetScrapePoolTargetsAdded,
|
||||
sm.targetSyncFailed,
|
||||
// Used by targetScraper.
|
||||
sm.targetScrapeExceededBodySizeLimit,
|
||||
// Used by scrapeCache.
|
||||
sm.targetScrapeCacheFlushForced,
|
||||
// Used by scrapeLoop.
|
||||
sm.targetIntervalLength,
|
||||
sm.targetScrapeSampleLimit,
|
||||
sm.targetScrapeSampleDuplicate,
|
||||
sm.targetScrapeSampleOutOfOrder,
|
||||
sm.targetScrapeSampleOutOfBounds,
|
||||
sm.targetScrapeExemplarOutOfOrder,
|
||||
sm.targetScrapePoolExceededLabelLimits,
|
||||
sm.targetScrapeNativeHistogramBucketLimit,
|
||||
} {
|
||||
err := reg.Register(collector)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to register scrape metrics: %w", err)
|
||||
}
|
||||
}
|
||||
return sm, nil
|
||||
}
|
||||
|
||||
func (sm *scrapeMetrics) setTargetMetadataCacheGatherer(gatherer TargetsGatherer) {
|
||||
sm.targetMetadataCache.TargetsGatherer = gatherer
|
||||
}
|
||||
|
||||
type TargetsGatherer interface {
|
||||
TargetsActive() map[string][]*Target
|
||||
}
|
||||
|
||||
// MetadataMetricsCollector is a Custom Collector for the metadata cache metrics.
|
||||
type MetadataMetricsCollector struct {
|
||||
CacheEntries *prometheus.Desc
|
||||
CacheBytes *prometheus.Desc
|
||||
TargetsGatherer TargetsGatherer
|
||||
}
|
||||
|
||||
// Describe sends the metrics descriptions to the channel.
|
||||
func (mc *MetadataMetricsCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
ch <- mc.CacheEntries
|
||||
ch <- mc.CacheBytes
|
||||
}
|
||||
|
||||
// Collect creates and sends the metrics for the metadata cache.
|
||||
func (mc *MetadataMetricsCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
if mc.TargetsGatherer == nil {
|
||||
return
|
||||
}
|
||||
|
||||
for tset, targets := range mc.TargetsGatherer.TargetsActive() {
|
||||
var size, length int
|
||||
for _, t := range targets {
|
||||
size += t.MetadataSize()
|
||||
length += t.MetadataLength()
|
||||
}
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
mc.CacheEntries,
|
||||
prometheus.GaugeValue,
|
||||
float64(length),
|
||||
tset,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
mc.CacheBytes,
|
||||
prometheus.GaugeValue,
|
||||
float64(size),
|
||||
tset,
|
||||
)
|
||||
}
|
||||
}
|
245
scrape/scrape.go
245
scrape/scrape.go
|
@ -31,7 +31,6 @@ import (
|
|||
"github.com/go-kit/log"
|
||||
"github.com/go-kit/log/level"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/common/version"
|
||||
|
@ -61,172 +60,6 @@ var AlignScrapeTimestamps = true
|
|||
|
||||
var errNameLabelMandatory = fmt.Errorf("missing metric name (%s label)", labels.MetricName)
|
||||
|
||||
var (
|
||||
targetIntervalLength = prometheus.NewSummaryVec(
|
||||
prometheus.SummaryOpts{
|
||||
Name: "prometheus_target_interval_length_seconds",
|
||||
Help: "Actual intervals between scrapes.",
|
||||
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
|
||||
},
|
||||
[]string{"interval"},
|
||||
)
|
||||
targetReloadIntervalLength = prometheus.NewSummaryVec(
|
||||
prometheus.SummaryOpts{
|
||||
Name: "prometheus_target_reload_length_seconds",
|
||||
Help: "Actual interval to reload the scrape pool with a given configuration.",
|
||||
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
|
||||
},
|
||||
[]string{"interval"},
|
||||
)
|
||||
targetScrapePools = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrape_pools_total",
|
||||
Help: "Total number of scrape pool creation attempts.",
|
||||
},
|
||||
)
|
||||
targetScrapePoolsFailed = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrape_pools_failed_total",
|
||||
Help: "Total number of scrape pool creations that failed.",
|
||||
},
|
||||
)
|
||||
targetScrapePoolReloads = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrape_pool_reloads_total",
|
||||
Help: "Total number of scrape pool reloads.",
|
||||
},
|
||||
)
|
||||
targetScrapePoolReloadsFailed = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrape_pool_reloads_failed_total",
|
||||
Help: "Total number of failed scrape pool reloads.",
|
||||
},
|
||||
)
|
||||
targetScrapePoolExceededTargetLimit = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrape_pool_exceeded_target_limit_total",
|
||||
Help: "Total number of times scrape pools hit the target limit, during sync or config reload.",
|
||||
},
|
||||
)
|
||||
targetScrapePoolTargetLimit = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "prometheus_target_scrape_pool_target_limit",
|
||||
Help: "Maximum number of targets allowed in this scrape pool.",
|
||||
},
|
||||
[]string{"scrape_job"},
|
||||
)
|
||||
targetScrapePoolTargetsAdded = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "prometheus_target_scrape_pool_targets",
|
||||
Help: "Current number of targets in this scrape pool.",
|
||||
},
|
||||
[]string{"scrape_job"},
|
||||
)
|
||||
targetSyncIntervalLength = prometheus.NewSummaryVec(
|
||||
prometheus.SummaryOpts{
|
||||
Name: "prometheus_target_sync_length_seconds",
|
||||
Help: "Actual interval to sync the scrape pool.",
|
||||
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
|
||||
},
|
||||
[]string{"scrape_job"},
|
||||
)
|
||||
targetScrapePoolSyncsCounter = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrape_pool_sync_total",
|
||||
Help: "Total number of syncs that were executed on a scrape pool.",
|
||||
},
|
||||
[]string{"scrape_job"},
|
||||
)
|
||||
targetScrapeExceededBodySizeLimit = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrapes_exceeded_body_size_limit_total",
|
||||
Help: "Total number of scrapes that hit the body size limit",
|
||||
},
|
||||
)
|
||||
targetScrapeSampleLimit = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrapes_exceeded_sample_limit_total",
|
||||
Help: "Total number of scrapes that hit the sample limit and were rejected.",
|
||||
},
|
||||
)
|
||||
targetScrapeSampleDuplicate = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrapes_sample_duplicate_timestamp_total",
|
||||
Help: "Total number of samples rejected due to duplicate timestamps but different values.",
|
||||
},
|
||||
)
|
||||
targetScrapeSampleOutOfOrder = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrapes_sample_out_of_order_total",
|
||||
Help: "Total number of samples rejected due to not being out of the expected order.",
|
||||
},
|
||||
)
|
||||
targetScrapeSampleOutOfBounds = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrapes_sample_out_of_bounds_total",
|
||||
Help: "Total number of samples rejected due to timestamp falling outside of the time bounds.",
|
||||
},
|
||||
)
|
||||
targetScrapeCacheFlushForced = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrapes_cache_flush_forced_total",
|
||||
Help: "How many times a scrape cache was flushed due to getting big while scrapes are failing.",
|
||||
},
|
||||
)
|
||||
targetScrapeExemplarOutOfOrder = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrapes_exemplar_out_of_order_total",
|
||||
Help: "Total number of exemplar rejected due to not being out of the expected order.",
|
||||
},
|
||||
)
|
||||
targetScrapePoolExceededLabelLimits = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrape_pool_exceeded_label_limits_total",
|
||||
Help: "Total number of times scrape pools hit the label limits, during sync or config reload.",
|
||||
},
|
||||
)
|
||||
targetSyncFailed = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_sync_failed_total",
|
||||
Help: "Total number of target sync failures.",
|
||||
},
|
||||
[]string{"scrape_job"},
|
||||
)
|
||||
targetScrapeNativeHistogramBucketLimit = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrapes_exceeded_native_histogram_bucket_limit_total",
|
||||
Help: "Total number of scrapes that hit the native histogram bucket limit and were rejected.",
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(
|
||||
targetIntervalLength,
|
||||
targetReloadIntervalLength,
|
||||
targetScrapePools,
|
||||
targetScrapePoolsFailed,
|
||||
targetScrapePoolReloads,
|
||||
targetScrapePoolReloadsFailed,
|
||||
targetSyncIntervalLength,
|
||||
targetScrapePoolSyncsCounter,
|
||||
targetScrapeExceededBodySizeLimit,
|
||||
targetScrapeSampleLimit,
|
||||
targetScrapeSampleDuplicate,
|
||||
targetScrapeSampleOutOfOrder,
|
||||
targetScrapeSampleOutOfBounds,
|
||||
targetScrapePoolExceededTargetLimit,
|
||||
targetScrapePoolTargetLimit,
|
||||
targetScrapePoolTargetsAdded,
|
||||
targetScrapeCacheFlushForced,
|
||||
targetMetadataCache,
|
||||
targetScrapeExemplarOutOfOrder,
|
||||
targetScrapePoolExceededLabelLimits,
|
||||
targetSyncFailed,
|
||||
targetScrapeNativeHistogramBucketLimit,
|
||||
)
|
||||
}
|
||||
|
||||
// scrapePool manages scrapes for sets of targets.
|
||||
type scrapePool struct {
|
||||
appendable storage.Appendable
|
||||
|
@ -251,6 +84,8 @@ type scrapePool struct {
|
|||
newLoop func(scrapeLoopOptions) loop
|
||||
|
||||
noDefaultPort bool
|
||||
|
||||
metrics *scrapeMetrics
|
||||
}
|
||||
|
||||
type labelLimits struct {
|
||||
|
@ -279,15 +114,13 @@ const maxAheadTime = 10 * time.Minute
|
|||
// returning an empty label set is interpreted as "drop"
|
||||
type labelsMutator func(labels.Labels) labels.Labels
|
||||
|
||||
func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed uint64, logger log.Logger, options *Options) (*scrapePool, error) {
|
||||
targetScrapePools.Inc()
|
||||
func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed uint64, logger log.Logger, options *Options, metrics *scrapeMetrics) (*scrapePool, error) {
|
||||
if logger == nil {
|
||||
logger = log.NewNopLogger()
|
||||
}
|
||||
|
||||
client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, cfg.JobName, options.HTTPClientOptions...)
|
||||
if err != nil {
|
||||
targetScrapePoolsFailed.Inc()
|
||||
return nil, errors.Wrap(err, "error creating HTTP client")
|
||||
}
|
||||
|
||||
|
@ -302,6 +135,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed
|
|||
activeTargets: map[uint64]*Target{},
|
||||
loops: map[uint64]loop{},
|
||||
logger: logger,
|
||||
metrics: metrics,
|
||||
httpOpts: options.HTTPClientOptions,
|
||||
noDefaultPort: options.NoDefaultPort,
|
||||
}
|
||||
|
@ -309,7 +143,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed
|
|||
// Update the targets retrieval function for metadata to a new scrape cache.
|
||||
cache := opts.cache
|
||||
if cache == nil {
|
||||
cache = newScrapeCache()
|
||||
cache = newScrapeCache(metrics)
|
||||
}
|
||||
opts.target.SetMetadataStore(cache)
|
||||
|
||||
|
@ -336,9 +170,10 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed
|
|||
options.EnableMetadataStorage,
|
||||
opts.target,
|
||||
options.PassMetadataInContext,
|
||||
metrics,
|
||||
)
|
||||
}
|
||||
targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit))
|
||||
sp.metrics.targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit))
|
||||
return sp, nil
|
||||
}
|
||||
|
||||
|
@ -393,11 +228,11 @@ func (sp *scrapePool) stop() {
|
|||
sp.client.CloseIdleConnections()
|
||||
|
||||
if sp.config != nil {
|
||||
targetScrapePoolSyncsCounter.DeleteLabelValues(sp.config.JobName)
|
||||
targetScrapePoolTargetLimit.DeleteLabelValues(sp.config.JobName)
|
||||
targetScrapePoolTargetsAdded.DeleteLabelValues(sp.config.JobName)
|
||||
targetSyncIntervalLength.DeleteLabelValues(sp.config.JobName)
|
||||
targetSyncFailed.DeleteLabelValues(sp.config.JobName)
|
||||
sp.metrics.targetScrapePoolSyncsCounter.DeleteLabelValues(sp.config.JobName)
|
||||
sp.metrics.targetScrapePoolTargetLimit.DeleteLabelValues(sp.config.JobName)
|
||||
sp.metrics.targetScrapePoolTargetsAdded.DeleteLabelValues(sp.config.JobName)
|
||||
sp.metrics.targetSyncIntervalLength.DeleteLabelValues(sp.config.JobName)
|
||||
sp.metrics.targetSyncFailed.DeleteLabelValues(sp.config.JobName)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -407,12 +242,12 @@ func (sp *scrapePool) stop() {
|
|||
func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error {
|
||||
sp.mtx.Lock()
|
||||
defer sp.mtx.Unlock()
|
||||
targetScrapePoolReloads.Inc()
|
||||
sp.metrics.targetScrapePoolReloads.Inc()
|
||||
start := time.Now()
|
||||
|
||||
client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, cfg.JobName, sp.httpOpts...)
|
||||
if err != nil {
|
||||
targetScrapePoolReloadsFailed.Inc()
|
||||
sp.metrics.targetScrapePoolReloadsFailed.Inc()
|
||||
return errors.Wrap(err, "error creating HTTP client")
|
||||
}
|
||||
|
||||
|
@ -421,7 +256,7 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error {
|
|||
oldClient := sp.client
|
||||
sp.client = client
|
||||
|
||||
targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit))
|
||||
sp.metrics.targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit))
|
||||
|
||||
var (
|
||||
wg sync.WaitGroup
|
||||
|
@ -449,7 +284,7 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error {
|
|||
oldLoop.disableEndOfRunStalenessMarkers()
|
||||
cache = oc
|
||||
} else {
|
||||
cache = newScrapeCache()
|
||||
cache = newScrapeCache(sp.metrics)
|
||||
}
|
||||
|
||||
t := sp.activeTargets[fp]
|
||||
|
@ -496,7 +331,7 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error {
|
|||
|
||||
wg.Wait()
|
||||
oldClient.CloseIdleConnections()
|
||||
targetReloadIntervalLength.WithLabelValues(interval.String()).Observe(
|
||||
sp.metrics.targetReloadIntervalLength.WithLabelValues(interval.String()).Observe(
|
||||
time.Since(start).Seconds(),
|
||||
)
|
||||
return nil
|
||||
|
@ -520,7 +355,7 @@ func (sp *scrapePool) Sync(tgs []*targetgroup.Group) {
|
|||
for _, err := range failures {
|
||||
level.Error(sp.logger).Log("msg", "Creating target failed", "err", err)
|
||||
}
|
||||
targetSyncFailed.WithLabelValues(sp.config.JobName).Add(float64(len(failures)))
|
||||
sp.metrics.targetSyncFailed.WithLabelValues(sp.config.JobName).Add(float64(len(failures)))
|
||||
for _, t := range targets {
|
||||
// Replicate .Labels().IsEmpty() with a loop here to avoid generating garbage.
|
||||
nonEmpty := false
|
||||
|
@ -539,10 +374,10 @@ func (sp *scrapePool) Sync(tgs []*targetgroup.Group) {
|
|||
sp.targetMtx.Unlock()
|
||||
sp.sync(all)
|
||||
|
||||
targetSyncIntervalLength.WithLabelValues(sp.config.JobName).Observe(
|
||||
sp.metrics.targetSyncIntervalLength.WithLabelValues(sp.config.JobName).Observe(
|
||||
time.Since(start).Seconds(),
|
||||
)
|
||||
targetScrapePoolSyncsCounter.WithLabelValues(sp.config.JobName).Inc()
|
||||
sp.metrics.targetScrapePoolSyncsCounter.WithLabelValues(sp.config.JobName).Inc()
|
||||
}
|
||||
|
||||
// sync takes a list of potentially duplicated targets, deduplicates them, starts
|
||||
|
@ -583,6 +418,7 @@ func (sp *scrapePool) sync(targets []*Target) {
|
|||
timeout: timeout,
|
||||
bodySizeLimit: bodySizeLimit,
|
||||
acceptHeader: acceptHeader(sp.config.ScrapeProtocols),
|
||||
metrics: sp.metrics,
|
||||
}
|
||||
l := sp.newLoop(scrapeLoopOptions{
|
||||
target: t,
|
||||
|
@ -634,7 +470,7 @@ func (sp *scrapePool) sync(targets []*Target) {
|
|||
|
||||
sp.targetMtx.Unlock()
|
||||
|
||||
targetScrapePoolTargetsAdded.WithLabelValues(sp.config.JobName).Set(float64(len(uniqueLoops)))
|
||||
sp.metrics.targetScrapePoolTargetsAdded.WithLabelValues(sp.config.JobName).Set(float64(len(uniqueLoops)))
|
||||
forcedErr := sp.refreshTargetLimitErr()
|
||||
for _, l := range sp.loops {
|
||||
l.setForcedError(forcedErr)
|
||||
|
@ -658,7 +494,7 @@ func (sp *scrapePool) refreshTargetLimitErr() error {
|
|||
return nil
|
||||
}
|
||||
if l := len(sp.activeTargets); l > int(sp.config.TargetLimit) {
|
||||
targetScrapePoolExceededTargetLimit.Inc()
|
||||
sp.metrics.targetScrapePoolExceededTargetLimit.Inc()
|
||||
return fmt.Errorf("target_limit exceeded (number of targets: %d, limit: %d)", l, sp.config.TargetLimit)
|
||||
}
|
||||
return nil
|
||||
|
@ -806,6 +642,8 @@ type targetScraper struct {
|
|||
|
||||
bodySizeLimit int64
|
||||
acceptHeader string
|
||||
|
||||
metrics *scrapeMetrics
|
||||
}
|
||||
|
||||
var errBodySizeLimit = errors.New("body size limit exceeded")
|
||||
|
@ -863,7 +701,7 @@ func (s *targetScraper) readResponse(ctx context.Context, resp *http.Response, w
|
|||
return "", err
|
||||
}
|
||||
if n >= s.bodySizeLimit {
|
||||
targetScrapeExceededBodySizeLimit.Inc()
|
||||
s.metrics.targetScrapeExceededBodySizeLimit.Inc()
|
||||
return "", errBodySizeLimit
|
||||
}
|
||||
return resp.Header.Get("Content-Type"), nil
|
||||
|
@ -889,7 +727,7 @@ func (s *targetScraper) readResponse(ctx context.Context, resp *http.Response, w
|
|||
return "", err
|
||||
}
|
||||
if n >= s.bodySizeLimit {
|
||||
targetScrapeExceededBodySizeLimit.Inc()
|
||||
s.metrics.targetScrapeExceededBodySizeLimit.Inc()
|
||||
return "", errBodySizeLimit
|
||||
}
|
||||
return resp.Header.Get("Content-Type"), nil
|
||||
|
@ -942,6 +780,8 @@ type scrapeLoop struct {
|
|||
|
||||
reportExtraMetrics bool
|
||||
appendMetadataToWAL bool
|
||||
|
||||
metrics *scrapeMetrics
|
||||
}
|
||||
|
||||
// scrapeCache tracks mappings of exposed metric strings to label sets and
|
||||
|
@ -969,6 +809,8 @@ type scrapeCache struct {
|
|||
|
||||
metaMtx sync.Mutex
|
||||
metadata map[string]*metaEntry
|
||||
|
||||
metrics *scrapeMetrics
|
||||
}
|
||||
|
||||
// metaEntry holds meta information about a metric.
|
||||
|
@ -984,13 +826,14 @@ func (m *metaEntry) size() int {
|
|||
return len(m.Help) + len(m.Unit) + len(m.Type)
|
||||
}
|
||||
|
||||
func newScrapeCache() *scrapeCache {
|
||||
func newScrapeCache(metrics *scrapeMetrics) *scrapeCache {
|
||||
return &scrapeCache{
|
||||
series: map[string]*cacheEntry{},
|
||||
droppedSeries: map[string]*uint64{},
|
||||
seriesCur: map[uint64]labels.Labels{},
|
||||
seriesPrev: map[uint64]labels.Labels{},
|
||||
metadata: map[string]*metaEntry{},
|
||||
metrics: metrics,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1009,7 +852,7 @@ func (c *scrapeCache) iterDone(flushCache bool) {
|
|||
// since the last scrape, and allow an additional 1000 in case
|
||||
// initial scrapes all fail.
|
||||
flushCache = true
|
||||
targetScrapeCacheFlushForced.Inc()
|
||||
c.metrics.targetScrapeCacheFlushForced.Inc()
|
||||
}
|
||||
|
||||
if flushCache {
|
||||
|
@ -1213,6 +1056,7 @@ func newScrapeLoop(ctx context.Context,
|
|||
appendMetadataToWAL bool,
|
||||
target *Target,
|
||||
passMetadataInContext bool,
|
||||
metrics *scrapeMetrics,
|
||||
) *scrapeLoop {
|
||||
if l == nil {
|
||||
l = log.NewNopLogger()
|
||||
|
@ -1221,7 +1065,7 @@ func newScrapeLoop(ctx context.Context,
|
|||
buffers = pool.New(1e3, 1e6, 3, func(sz int) interface{} { return make([]byte, 0, sz) })
|
||||
}
|
||||
if cache == nil {
|
||||
cache = newScrapeCache()
|
||||
cache = newScrapeCache(metrics)
|
||||
}
|
||||
|
||||
appenderCtx := ctx
|
||||
|
@ -1256,6 +1100,7 @@ func newScrapeLoop(ctx context.Context,
|
|||
scrapeClassicHistograms: scrapeClassicHistograms,
|
||||
reportExtraMetrics: reportExtraMetrics,
|
||||
appendMetadataToWAL: appendMetadataToWAL,
|
||||
metrics: metrics,
|
||||
}
|
||||
sl.ctx, sl.cancel = context.WithCancel(ctx)
|
||||
|
||||
|
@ -1335,7 +1180,7 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er
|
|||
|
||||
// Only record after the first scrape.
|
||||
if !last.IsZero() {
|
||||
targetIntervalLength.WithLabelValues(sl.interval.String()).Observe(
|
||||
sl.metrics.targetIntervalLength.WithLabelValues(sl.interval.String()).Observe(
|
||||
time.Since(last).Seconds(),
|
||||
)
|
||||
}
|
||||
|
@ -1676,7 +1521,7 @@ loop:
|
|||
|
||||
// If any label limits is exceeded the scrape should fail.
|
||||
if err = verifyLabelLimits(lset, sl.labelLimits); err != nil {
|
||||
targetScrapePoolExceededLabelLimits.Inc()
|
||||
sl.metrics.targetScrapePoolExceededLabelLimits.Inc()
|
||||
break loop
|
||||
}
|
||||
|
||||
|
@ -1741,14 +1586,14 @@ loop:
|
|||
err = sampleLimitErr
|
||||
}
|
||||
// We only want to increment this once per scrape, so this is Inc'd outside the loop.
|
||||
targetScrapeSampleLimit.Inc()
|
||||
sl.metrics.targetScrapeSampleLimit.Inc()
|
||||
}
|
||||
if bucketLimitErr != nil {
|
||||
if err == nil {
|
||||
err = bucketLimitErr // If sample limit is hit, that error takes precedence.
|
||||
}
|
||||
// We only want to increment this once per scrape, so this is Inc'd outside the loop.
|
||||
targetScrapeNativeHistogramBucketLimit.Inc()
|
||||
sl.metrics.targetScrapeNativeHistogramBucketLimit.Inc()
|
||||
}
|
||||
if appErrs.numOutOfOrder > 0 {
|
||||
level.Warn(sl.l).Log("msg", "Error on ingesting out-of-order samples", "num_dropped", appErrs.numOutOfOrder)
|
||||
|
@ -1792,17 +1637,17 @@ func (sl *scrapeLoop) checkAddError(ce *cacheEntry, met []byte, tp *int64, err e
|
|||
case storage.ErrOutOfOrderSample:
|
||||
appErrs.numOutOfOrder++
|
||||
level.Debug(sl.l).Log("msg", "Out of order sample", "series", string(met))
|
||||
targetScrapeSampleOutOfOrder.Inc()
|
||||
sl.metrics.targetScrapeSampleOutOfOrder.Inc()
|
||||
return false, nil
|
||||
case storage.ErrDuplicateSampleForTimestamp:
|
||||
appErrs.numDuplicates++
|
||||
level.Debug(sl.l).Log("msg", "Duplicate sample for timestamp", "series", string(met))
|
||||
targetScrapeSampleDuplicate.Inc()
|
||||
sl.metrics.targetScrapeSampleDuplicate.Inc()
|
||||
return false, nil
|
||||
case storage.ErrOutOfBounds:
|
||||
appErrs.numOutOfBounds++
|
||||
level.Debug(sl.l).Log("msg", "Out of bounds metric", "series", string(met))
|
||||
targetScrapeSampleOutOfBounds.Inc()
|
||||
sl.metrics.targetScrapeSampleOutOfBounds.Inc()
|
||||
return false, nil
|
||||
case errSampleLimit:
|
||||
// Keep on parsing output if we hit the limit, so we report the correct
|
||||
|
@ -1826,7 +1671,7 @@ func (sl *scrapeLoop) checkAddExemplarError(err error, e exemplar.Exemplar, appE
|
|||
case storage.ErrOutOfOrderExemplar:
|
||||
appErrs.numExemplarOutOfOrder++
|
||||
level.Debug(sl.l).Log("msg", "Out of order exemplar", "exemplar", fmt.Sprintf("%+v", e))
|
||||
targetScrapeExemplarOutOfOrder.Inc()
|
||||
sl.metrics.targetScrapeExemplarOutOfOrder.Inc()
|
||||
return nil
|
||||
default:
|
||||
return err
|
||||
|
|
|
@ -57,11 +57,18 @@ func TestMain(m *testing.M) {
|
|||
testutil.TolerantVerifyLeak(m)
|
||||
}
|
||||
|
||||
func newTestScrapeMetrics(t testing.TB) *scrapeMetrics {
|
||||
reg := prometheus.NewRegistry()
|
||||
metrics, err := newScrapeMetrics(reg)
|
||||
require.NoError(t, err)
|
||||
return metrics
|
||||
}
|
||||
|
||||
func TestNewScrapePool(t *testing.T) {
|
||||
var (
|
||||
app = &nopAppendable{}
|
||||
cfg = &config.ScrapeConfig{}
|
||||
sp, _ = newScrapePool(cfg, app, 0, nil, &Options{})
|
||||
sp, _ = newScrapePool(cfg, app, 0, nil, &Options{}, newTestScrapeMetrics(t))
|
||||
)
|
||||
|
||||
if a, ok := sp.appendable.(*nopAppendable); !ok || a != app {
|
||||
|
@ -97,7 +104,7 @@ func TestDroppedTargetsList(t *testing.T) {
|
|||
},
|
||||
},
|
||||
}
|
||||
sp, _ = newScrapePool(cfg, app, 0, nil, &Options{})
|
||||
sp, _ = newScrapePool(cfg, app, 0, nil, &Options{}, newTestScrapeMetrics(t))
|
||||
expectedLabelSetString = "{__address__=\"127.0.0.1:9090\", __scrape_interval__=\"0s\", __scrape_timeout__=\"0s\", job=\"dropMe\"}"
|
||||
expectedLength = 2
|
||||
)
|
||||
|
@ -117,7 +124,10 @@ func TestDroppedTargetsList(t *testing.T) {
|
|||
// TestDiscoveredLabelsUpdate checks that DiscoveredLabels are updated
|
||||
// even when new labels don't affect the target `hash`.
|
||||
func TestDiscoveredLabelsUpdate(t *testing.T) {
|
||||
sp := &scrapePool{}
|
||||
sp := &scrapePool{
|
||||
metrics: newTestScrapeMetrics(t),
|
||||
}
|
||||
|
||||
// These are used when syncing so need this to avoid a panic.
|
||||
sp.config = &config.ScrapeConfig{
|
||||
ScrapeInterval: model.Duration(1),
|
||||
|
@ -184,6 +194,7 @@ func TestScrapePoolStop(t *testing.T) {
|
|||
loops: map[uint64]loop{},
|
||||
cancel: func() {},
|
||||
client: http.DefaultClient,
|
||||
metrics: newTestScrapeMetrics(t),
|
||||
}
|
||||
var mtx sync.Mutex
|
||||
stopped := map[uint64]bool{}
|
||||
|
@ -262,6 +273,7 @@ func TestScrapePoolReload(t *testing.T) {
|
|||
}
|
||||
return l
|
||||
}
|
||||
|
||||
sp := &scrapePool{
|
||||
appendable: &nopAppendable{},
|
||||
activeTargets: map[uint64]*Target{},
|
||||
|
@ -269,6 +281,7 @@ func TestScrapePoolReload(t *testing.T) {
|
|||
newLoop: newLoop,
|
||||
logger: nil,
|
||||
client: http.DefaultClient,
|
||||
metrics: newTestScrapeMetrics(t),
|
||||
}
|
||||
|
||||
// Reloading a scrape pool with a new scrape configuration must stop all scrape
|
||||
|
@ -352,6 +365,7 @@ func TestScrapePoolReloadPreserveRelabeledIntervalTimeout(t *testing.T) {
|
|||
newLoop: newLoop,
|
||||
logger: nil,
|
||||
client: http.DefaultClient,
|
||||
metrics: newTestScrapeMetrics(t),
|
||||
}
|
||||
|
||||
err := sp.reload(reloadCfg)
|
||||
|
@ -381,6 +395,7 @@ func TestScrapePoolTargetLimit(t *testing.T) {
|
|||
newLoop: newLoop,
|
||||
logger: log.NewNopLogger(),
|
||||
client: http.DefaultClient,
|
||||
metrics: newTestScrapeMetrics(t),
|
||||
}
|
||||
|
||||
tgs := []*targetgroup.Group{}
|
||||
|
@ -489,7 +504,7 @@ func TestScrapePoolTargetLimit(t *testing.T) {
|
|||
func TestScrapePoolAppender(t *testing.T) {
|
||||
cfg := &config.ScrapeConfig{}
|
||||
app := &nopAppendable{}
|
||||
sp, _ := newScrapePool(cfg, app, 0, nil, &Options{})
|
||||
sp, _ := newScrapePool(cfg, app, 0, nil, &Options{}, newTestScrapeMetrics(t))
|
||||
|
||||
loop := sp.newLoop(scrapeLoopOptions{
|
||||
target: &Target{},
|
||||
|
@ -545,7 +560,7 @@ func TestScrapePoolRaces(t *testing.T) {
|
|||
newConfig := func() *config.ScrapeConfig {
|
||||
return &config.ScrapeConfig{ScrapeInterval: interval, ScrapeTimeout: timeout}
|
||||
}
|
||||
sp, _ := newScrapePool(newConfig(), &nopAppendable{}, 0, nil, &Options{})
|
||||
sp, _ := newScrapePool(newConfig(), &nopAppendable{}, 0, nil, &Options{}, newTestScrapeMetrics(t))
|
||||
tgts := []*targetgroup.Group{
|
||||
{
|
||||
Targets: []model.LabelSet{
|
||||
|
@ -595,6 +610,7 @@ func TestScrapePoolScrapeLoopsStarted(t *testing.T) {
|
|||
newLoop: newLoop,
|
||||
logger: nil,
|
||||
client: http.DefaultClient,
|
||||
metrics: newTestScrapeMetrics(t),
|
||||
}
|
||||
|
||||
tgs := []*targetgroup.Group{
|
||||
|
@ -643,6 +659,7 @@ func TestScrapeLoopStopBeforeRun(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
// The scrape pool synchronizes on stopping scrape loops. However, new scrape
|
||||
|
@ -716,6 +733,7 @@ func TestScrapeLoopStop(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
// Terminate loop after 2 scrapes.
|
||||
|
@ -793,6 +811,7 @@ func TestScrapeLoopRun(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
// The loop must terminate during the initial offset if the context
|
||||
|
@ -849,6 +868,7 @@ func TestScrapeLoopRun(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
go func() {
|
||||
|
@ -909,6 +929,7 @@ func TestScrapeLoopForcedErr(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
forcedErr := fmt.Errorf("forced err")
|
||||
|
@ -945,7 +966,7 @@ func TestScrapeLoopMetadata(t *testing.T) {
|
|||
var (
|
||||
signal = make(chan struct{})
|
||||
scraper = &testScraper{}
|
||||
cache = newScrapeCache()
|
||||
cache = newScrapeCache(newTestScrapeMetrics(t))
|
||||
)
|
||||
defer close(signal)
|
||||
|
||||
|
@ -968,6 +989,7 @@ func TestScrapeLoopMetadata(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
defer cancel()
|
||||
|
||||
|
@ -1026,6 +1048,7 @@ func simpleTestScrapeLoop(t testing.TB) (context.Context, *scrapeLoop) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
t.Cleanup(func() { cancel() })
|
||||
|
||||
|
@ -1087,6 +1110,7 @@ func TestScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
slApp := sl.appender(ctx)
|
||||
|
@ -1166,6 +1190,7 @@ func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrape(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
// Succeed once, several failures, then stop.
|
||||
numScrapes := 0
|
||||
|
@ -1230,6 +1255,7 @@ func TestScrapeLoopRunCreatesStaleMarkersOnParseFailure(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
// Succeed once, several failures, then stop.
|
||||
|
@ -1297,6 +1323,7 @@ func TestScrapeLoopCache(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
numScrapes := 0
|
||||
|
@ -1381,6 +1408,7 @@ func TestScrapeLoopCacheMemoryExhaustionProtection(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
numScrapes := 0
|
||||
|
@ -1496,6 +1524,7 @@ func TestScrapeLoopAppend(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
now := time.Now()
|
||||
|
@ -1583,7 +1612,8 @@ func TestScrapeLoopAppendForConflictingPrefixedLabels(t *testing.T) {
|
|||
return mutateSampleLabels(l, &Target{labels: labels.FromStrings(tc.targetLabels...)}, false, nil)
|
||||
},
|
||||
nil,
|
||||
func(ctx context.Context) storage.Appender { return app }, nil, 0, true, 0, 0, nil, 0, 0, false, false, false, nil, false,
|
||||
func(ctx context.Context) storage.Appender { return app },
|
||||
nil, 0, true, 0, 0, nil, 0, 0, false, false, false, nil, false, newTestScrapeMetrics(t),
|
||||
)
|
||||
slApp := sl.appender(context.Background())
|
||||
_, _, _, err := sl.append(slApp, []byte(tc.exposedLabels), "", time.Date(2000, 1, 1, 1, 0, 0, 0, time.UTC))
|
||||
|
@ -1623,6 +1653,7 @@ func TestScrapeLoopAppendCacheEntryButErrNotFound(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
fakeRef := storage.SeriesRef(1)
|
||||
|
@ -1682,11 +1713,12 @@ func TestScrapeLoopAppendSampleLimit(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
// Get the value of the Counter before performing the append.
|
||||
beforeMetric := dto.Metric{}
|
||||
err := targetScrapeSampleLimit.Write(&beforeMetric)
|
||||
err := sl.metrics.targetScrapeSampleLimit.Write(&beforeMetric)
|
||||
require.NoError(t, err)
|
||||
|
||||
beforeMetricValue := beforeMetric.GetCounter().GetValue()
|
||||
|
@ -1705,7 +1737,7 @@ func TestScrapeLoopAppendSampleLimit(t *testing.T) {
|
|||
// Check that the Counter has been incremented a single time for the scrape,
|
||||
// not multiple times for each sample.
|
||||
metric := dto.Metric{}
|
||||
err = targetScrapeSampleLimit.Write(&metric)
|
||||
err = sl.metrics.targetScrapeSampleLimit.Write(&metric)
|
||||
require.NoError(t, err)
|
||||
|
||||
value := metric.GetCounter().GetValue()
|
||||
|
@ -1760,10 +1792,11 @@ func TestScrapeLoop_HistogramBucketLimit(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
metric := dto.Metric{}
|
||||
err := targetScrapeNativeHistogramBucketLimit.Write(&metric)
|
||||
err := sl.metrics.targetScrapeNativeHistogramBucketLimit.Write(&metric)
|
||||
require.NoError(t, err)
|
||||
beforeMetricValue := metric.GetCounter().GetValue()
|
||||
|
||||
|
@ -1801,7 +1834,7 @@ func TestScrapeLoop_HistogramBucketLimit(t *testing.T) {
|
|||
require.Equal(t, 3, added)
|
||||
require.Equal(t, 3, seriesAdded)
|
||||
|
||||
err = targetScrapeNativeHistogramBucketLimit.Write(&metric)
|
||||
err = sl.metrics.targetScrapeNativeHistogramBucketLimit.Write(&metric)
|
||||
require.NoError(t, err)
|
||||
metricValue := metric.GetCounter().GetValue()
|
||||
require.Equal(t, beforeMetricValue, metricValue)
|
||||
|
@ -1827,7 +1860,7 @@ func TestScrapeLoop_HistogramBucketLimit(t *testing.T) {
|
|||
require.Equal(t, 3, added)
|
||||
require.Equal(t, 0, seriesAdded)
|
||||
|
||||
err = targetScrapeNativeHistogramBucketLimit.Write(&metric)
|
||||
err = sl.metrics.targetScrapeNativeHistogramBucketLimit.Write(&metric)
|
||||
require.NoError(t, err)
|
||||
metricValue = metric.GetCounter().GetValue()
|
||||
require.Equal(t, beforeMetricValue+1, metricValue)
|
||||
|
@ -1859,6 +1892,7 @@ func TestScrapeLoop_ChangingMetricString(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
now := time.Now()
|
||||
|
@ -1908,6 +1942,7 @@ func TestScrapeLoopAppendStaleness(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
now := time.Now()
|
||||
|
@ -1960,6 +1995,7 @@ func TestScrapeLoopAppendNoStalenessIfTimestamp(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
now := time.Now()
|
||||
|
@ -2286,6 +2322,7 @@ metric: <
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
now := time.Now()
|
||||
|
@ -2374,6 +2411,7 @@ func TestScrapeLoopAppendExemplarSeries(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
now := time.Now()
|
||||
|
@ -2427,6 +2465,7 @@ func TestScrapeLoopRunReportsTargetDownOnScrapeError(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error {
|
||||
|
@ -2464,6 +2503,7 @@ func TestScrapeLoopRunReportsTargetDownOnInvalidUTF8(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error {
|
||||
|
@ -2514,6 +2554,7 @@ func TestScrapeLoopAppendGracefullyIfAmendOrOutOfOrderOrOutOfBounds(t *testing.T
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
now := time.Unix(1, 0)
|
||||
|
@ -2560,6 +2601,7 @@ func TestScrapeLoopOutOfBoundsTimeError(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
now := time.Now().Add(20 * time.Minute)
|
||||
|
@ -2755,6 +2797,7 @@ func TestTargetScraperBodySizeLimit(t *testing.T) {
|
|||
client: http.DefaultClient,
|
||||
bodySizeLimit: bodySizeLimit,
|
||||
acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols),
|
||||
metrics: newTestScrapeMetrics(t),
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
|
||||
|
@ -2849,6 +2892,7 @@ func TestScrapeLoop_RespectTimestamps(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
now := time.Now()
|
||||
|
@ -2891,6 +2935,7 @@ func TestScrapeLoop_DiscardTimestamps(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
now := time.Now()
|
||||
|
@ -2932,6 +2977,7 @@ func TestScrapeLoopDiscardDuplicateLabels(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
defer cancel()
|
||||
|
||||
|
@ -2991,6 +3037,7 @@ func TestScrapeLoopDiscardUnnamedMetrics(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
defer cancel()
|
||||
|
||||
|
@ -3083,7 +3130,7 @@ func TestReuseScrapeCache(t *testing.T) {
|
|||
ScrapeInterval: model.Duration(5 * time.Second),
|
||||
MetricsPath: "/metrics",
|
||||
}
|
||||
sp, _ = newScrapePool(cfg, app, 0, nil, &Options{})
|
||||
sp, _ = newScrapePool(cfg, app, 0, nil, &Options{}, newTestScrapeMetrics(t))
|
||||
t1 = &Target{
|
||||
discoveredLabels: labels.FromStrings("labelNew", "nameNew", "labelNew1", "nameNew1", "labelNew2", "nameNew2"),
|
||||
}
|
||||
|
@ -3255,6 +3302,7 @@ func TestScrapeAddFast(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
defer cancel()
|
||||
|
||||
|
@ -3275,7 +3323,7 @@ func TestScrapeAddFast(t *testing.T) {
|
|||
require.NoError(t, slApp.Commit())
|
||||
}
|
||||
|
||||
func TestReuseCacheRace(*testing.T) {
|
||||
func TestReuseCacheRace(t *testing.T) {
|
||||
var (
|
||||
app = &nopAppendable{}
|
||||
cfg = &config.ScrapeConfig{
|
||||
|
@ -3284,7 +3332,7 @@ func TestReuseCacheRace(*testing.T) {
|
|||
ScrapeInterval: model.Duration(5 * time.Second),
|
||||
MetricsPath: "/metrics",
|
||||
}
|
||||
sp, _ = newScrapePool(cfg, app, 0, nil, &Options{})
|
||||
sp, _ = newScrapePool(cfg, app, 0, nil, &Options{}, newTestScrapeMetrics(t))
|
||||
t1 = &Target{
|
||||
discoveredLabels: labels.FromStrings("labelNew", "nameNew"),
|
||||
}
|
||||
|
@ -3309,7 +3357,7 @@ func TestReuseCacheRace(*testing.T) {
|
|||
|
||||
func TestCheckAddError(t *testing.T) {
|
||||
var appErrs appendErrors
|
||||
sl := scrapeLoop{l: log.NewNopLogger()}
|
||||
sl := scrapeLoop{l: log.NewNopLogger(), metrics: newTestScrapeMetrics(t)}
|
||||
sl.checkAddError(nil, nil, nil, storage.ErrOutOfOrderSample, nil, nil, &appErrs)
|
||||
require.Equal(t, 1, appErrs.numOutOfOrder)
|
||||
}
|
||||
|
@ -3342,6 +3390,7 @@ func TestScrapeReportSingleAppender(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
numScrapes := 0
|
||||
|
@ -3412,7 +3461,7 @@ func TestScrapeReportLimit(t *testing.T) {
|
|||
}))
|
||||
defer ts.Close()
|
||||
|
||||
sp, err := newScrapePool(cfg, s, 0, nil, &Options{})
|
||||
sp, err := newScrapePool(cfg, s, 0, nil, &Options{}, newTestScrapeMetrics(t))
|
||||
require.NoError(t, err)
|
||||
defer sp.stop()
|
||||
|
||||
|
@ -3545,6 +3594,7 @@ func TestScrapeLoopLabelLimit(t *testing.T) {
|
|||
false,
|
||||
nil,
|
||||
false,
|
||||
newTestScrapeMetrics(t),
|
||||
)
|
||||
|
||||
slApp := sl.appender(context.Background())
|
||||
|
@ -3583,7 +3633,7 @@ func TestTargetScrapeIntervalAndTimeoutRelabel(t *testing.T) {
|
|||
},
|
||||
},
|
||||
}
|
||||
sp, _ := newScrapePool(config, &nopAppendable{}, 0, nil, &Options{})
|
||||
sp, _ := newScrapePool(config, &nopAppendable{}, 0, nil, &Options{}, newTestScrapeMetrics(t))
|
||||
tgts := []*targetgroup.Group{
|
||||
{
|
||||
Targets: []model.LabelSet{{model.AddressLabel: "127.0.0.1:9090"}},
|
||||
|
|
|
@ -298,6 +298,7 @@ type dbMetrics struct {
|
|||
tombCleanTimer prometheus.Histogram
|
||||
blocksBytes prometheus.Gauge
|
||||
maxBytes prometheus.Gauge
|
||||
retentionDuration prometheus.Gauge
|
||||
}
|
||||
|
||||
func newDBMetrics(db *DB, r prometheus.Registerer) *dbMetrics {
|
||||
|
@ -371,6 +372,10 @@ func newDBMetrics(db *DB, r prometheus.Registerer) *dbMetrics {
|
|||
Name: "prometheus_tsdb_retention_limit_bytes",
|
||||
Help: "Max number of bytes to be retained in the tsdb blocks, configured 0 means disabled",
|
||||
})
|
||||
m.retentionDuration = prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "prometheus_tsdb_retention_limit_seconds",
|
||||
Help: "How long to retain samples in storage.",
|
||||
})
|
||||
m.sizeRetentionCount = prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: "prometheus_tsdb_size_retentions_total",
|
||||
Help: "The number of times that blocks were deleted because the maximum number of bytes was exceeded.",
|
||||
|
@ -391,6 +396,7 @@ func newDBMetrics(db *DB, r prometheus.Registerer) *dbMetrics {
|
|||
m.tombCleanTimer,
|
||||
m.blocksBytes,
|
||||
m.maxBytes,
|
||||
m.retentionDuration,
|
||||
)
|
||||
}
|
||||
return m
|
||||
|
@ -936,6 +942,7 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs
|
|||
maxBytes = 0
|
||||
}
|
||||
db.metrics.maxBytes.Set(float64(maxBytes))
|
||||
db.metrics.retentionDuration.Set((time.Duration(opts.RetentionDuration) * time.Millisecond).Seconds())
|
||||
|
||||
if err := db.reload(); err != nil {
|
||||
return nil, err
|
||||
|
|
|
@ -1501,6 +1501,19 @@ func TestTimeRetention(t *testing.T) {
|
|||
require.Equal(t, expBlocks[len(expBlocks)-1].MaxTime, actBlocks[len(actBlocks)-1].meta.MaxTime)
|
||||
}
|
||||
|
||||
func TestRetentionDurationMetric(t *testing.T) {
|
||||
db := openTestDB(t, &Options{
|
||||
RetentionDuration: 1000,
|
||||
}, []int64{100})
|
||||
defer func() {
|
||||
require.NoError(t, db.Close())
|
||||
}()
|
||||
|
||||
expRetentionDuration := 1.0
|
||||
actRetentionDuration := prom_testutil.ToFloat64(db.metrics.retentionDuration)
|
||||
require.Equal(t, expRetentionDuration, actRetentionDuration, "metric retention duration mismatch")
|
||||
}
|
||||
|
||||
func TestSizeRetention(t *testing.T) {
|
||||
opts := DefaultOptions()
|
||||
opts.OutOfOrderTimeWindow = 100
|
||||
|
|
|
@ -304,6 +304,10 @@ func (d *Decoder) Samples(rec []byte, samples []RefSample) ([]RefSample, error)
|
|||
baseRef = dec.Be64()
|
||||
baseTime = dec.Be64int64()
|
||||
)
|
||||
// Allow 1 byte for each varint and 8 for the value; the output slice must be at least that big.
|
||||
if minSize := dec.Len() / (1 + 1 + 8); cap(samples) < minSize {
|
||||
samples = make([]RefSample, 0, minSize)
|
||||
}
|
||||
for len(dec.B) > 0 && dec.Err() == nil {
|
||||
dref := dec.Varint64()
|
||||
dtime := dec.Varint64()
|
||||
|
|
|
@ -116,6 +116,9 @@ type annoErr struct {
|
|||
}
|
||||
|
||||
func (e annoErr) Error() string {
|
||||
if e.Query == "" {
|
||||
return e.Err.Error()
|
||||
}
|
||||
return fmt.Sprintf("%s (%s)", e.Err, e.PositionRange.StartPosInput(e.Query, 0))
|
||||
}
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@ import * as React from 'react';
|
|||
import { shallow } from 'enzyme';
|
||||
import { WALReplayData } from '../types/types';
|
||||
import { StartingContent } from './withStartingIndicator';
|
||||
import { Progress } from 'reactstrap';
|
||||
import { Alert, Progress } from 'reactstrap';
|
||||
|
||||
describe('Starting', () => {
|
||||
describe('progress bar', () => {
|
||||
|
@ -52,5 +52,17 @@ describe('Starting', () => {
|
|||
expect(progress.prop('value')).toBe(21);
|
||||
expect(progress.prop('color')).toBe('success');
|
||||
});
|
||||
|
||||
it('shows unexpected error', () => {
|
||||
const status: WALReplayData = {
|
||||
min: 0,
|
||||
max: 20,
|
||||
current: 0,
|
||||
};
|
||||
|
||||
const starting = shallow(<StartingContent status={status} isUnexpected={true} />);
|
||||
const alert = starting.find(Alert);
|
||||
expect(alert.prop('color')).toBe('danger');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -51,7 +51,7 @@ export const withStartingIndicator =
|
|||
const { ready, walReplayStatus, isUnexpected } = useFetchReadyInterval(pathPrefix);
|
||||
const staticReady = useReady();
|
||||
|
||||
if (staticReady || ready || isUnexpected) {
|
||||
if (staticReady || ready) {
|
||||
return <Page {...(rest as T)} />;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue