Merge remote-tracking branch 'upstream/main' into sync-upstream-20231026

This commit is contained in:
Jeanette Tan 2023-10-26 22:18:24 +08:00
commit 6341ba7374
33 changed files with 2037 additions and 1339 deletions

View file

@ -18,7 +18,6 @@ build:
windows:
- builtinassets
- stringlabels
flags: -a
ldflags: |
-X github.com/prometheus/common/version.Version={{.Version}}
-X github.com/prometheus/common/version.Revision={{.Revision}}

View file

@ -202,9 +202,10 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error {
level.Info(logger).Log("msg", "No default port will be appended to scrape targets' addresses.")
case "native-histograms":
c.tsdb.EnableNativeHistograms = true
// Change global variable. Hacky, but it's hard to pass new option or default to unmarshaller.
// Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers.
config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultNativeHistogramScrapeProtocols
level.Info(logger).Log("msg", "Experimental native histogram support enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultConfig.GlobalConfig.ScrapeProtocols))
config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultNativeHistogramScrapeProtocols
level.Info(logger).Log("msg", "Experimental native histogram support enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
case "":
continue
case "promql-at-modifier", "promql-negative-offset":
@ -620,8 +621,18 @@ func main() {
discoveryManagerNotify = legacymanager.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), legacymanager.Name("notify"))
}
scrapeManager, err := scrape.NewManager(
&cfg.scrape,
log.With(logger, "component", "scrape manager"),
fanoutStorage,
prometheus.DefaultRegisterer,
)
if err != nil {
level.Error(logger).Log("msg", "failed to create a scrape manager", "err", err)
os.Exit(1)
}
var (
scrapeManager = scrape.NewManager(&cfg.scrape, log.With(logger, "component", "scrape manager"), fanoutStorage)
tracingManager = tracing.NewManager(logger)
queryEngine *promql.Engine

View file

@ -0,0 +1,15 @@
tests:
- input_series:
- series: test
values: 0 1
promql_expr_test:
- expr: test
eval_time: 59s
exp_samples:
- value: 0
labels: test
- expr: test
eval_time: 1m
exp_samples:
- value: 1
labels: test

View file

@ -96,6 +96,9 @@ func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts) []error {
// Testing.
var errs []error
for _, t := range unitTestInp.Tests {
if t.Interval == 0 {
t.Interval = unitTestInp.EvaluationInterval
}
ers := t.test(evalInterval, groupOrderMap, queryOpts, unitTestInp.RuleFiles...)
if ers != nil {
errs = append(errs, ers...)

View file

@ -112,6 +112,16 @@ func TestRulesUnitTest(t *testing.T) {
},
want: 0,
},
{
name: "No test group interval",
args: args{
files: []string{"./testdata/no-test-group-interval.yml"},
},
queryOpts: promql.LazyLoaderOpts{
EnableNegativeOffset: true,
},
want: 0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {

View file

@ -39,7 +39,7 @@ tests:
``` yaml
# Series data
interval: <duration>
[ interval: <duration> | default = evaluation_interval ]
input_series:
[ - <series> ]

4
go.mod
View file

@ -49,7 +49,7 @@ require (
github.com/prometheus/alertmanager v0.26.0
github.com/prometheus/client_golang v1.17.0
github.com/prometheus/client_model v0.5.0
github.com/prometheus/common v0.44.0
github.com/prometheus/common v0.45.0
github.com/prometheus/common/assets v0.2.0
github.com/prometheus/common/sigv4 v0.1.0
github.com/prometheus/exporter-toolkit v0.10.0
@ -165,7 +165,7 @@ require (
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/moby/term v0.0.0-20210619224110-3f7ff695adc6 // indirect

8
go.sum
View file

@ -534,8 +534,8 @@ github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APP
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=
github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 h1:jWpvCLoY8Z/e3VKvlsiIGKtc+UG6U5vzxaoagmhXfyg=
github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0/go.mod h1:QUyp042oQthUoa9bqDv0ER0wrtXnBruoNd7aNjkbP+k=
github.com/maxatome/go-testdeep v1.12.0 h1:Ql7Go8Tg0C1D/uMMX59LAoYK7LffeJQ6X2T04nTH68g=
github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg=
github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=
@ -658,8 +658,8 @@ github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8b
github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc=
github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls=
github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY=
github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY=
github.com/prometheus/common v0.45.0 h1:2BGz0eBc2hdMDLnO/8n0jeB3oPrt2D08CekT0lneoxM=
github.com/prometheus/common v0.45.0/go.mod h1:YJmSTw9BoKxJplESWWxlbyttQR4uaEcGyv9MZjVOJsY=
github.com/prometheus/common/assets v0.2.0 h1:0P5OrzoHrYBOSM1OigWL3mY8ZvV2N4zIE/5AahrSrfM=
github.com/prometheus/common/assets v0.2.0/go.mod h1:D17UVUE12bHbim7HzwUvtqm6gwBEaDQ0F+hIGbFbccI=
github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4=

View file

@ -338,6 +338,34 @@ func (h *FloatHistogram) Equals(h2 *FloatHistogram) bool {
return true
}
// Size returns the total size of the FloatHistogram, which includes the size of the pointer
// to FloatHistogram, all its fields, and all elements contained in slices.
// NOTE: this is only valid for 64 bit architectures.
func (fh *FloatHistogram) Size() int {
// Size of each slice separately.
posSpanSize := len(fh.PositiveSpans) * 8 // 8 bytes (int32 + uint32).
negSpanSize := len(fh.NegativeSpans) * 8 // 8 bytes (int32 + uint32).
posBucketSize := len(fh.PositiveBuckets) * 8 // 8 bytes (float64).
negBucketSize := len(fh.NegativeBuckets) * 8 // 8 bytes (float64).
// Total size of the struct.
// fh is 8 bytes.
// fh.CounterResetHint is 4 bytes (1 byte bool + 3 bytes padding).
// fh.Schema is 4 bytes.
// fh.ZeroThreshold is 8 bytes.
// fh.ZeroCount is 8 bytes.
// fh.Count is 8 bytes.
// fh.Sum is 8 bytes.
// fh.PositiveSpans is 24 bytes.
// fh.NegativeSpans is 24 bytes.
// fh.PositiveBuckets is 24 bytes.
// fh.NegativeBuckets is 24 bytes.
structSize := 144
return structSize + posSpanSize + negSpanSize + posBucketSize + negBucketSize
}
// Compact eliminates empty buckets at the beginning and end of each span, then
// merges spans that are consecutive or at most maxEmptyBuckets apart, and
// finally splits spans that contain more consecutive empty buckets than

View file

@ -2341,3 +2341,55 @@ func TestFloatHistogramEquals(t *testing.T) {
notEquals(h1, *hNegBucketNaN)
equals(*hNegBucketNaN, *hNegBucketNaN)
}
func TestFloatHistogramSize(t *testing.T) {
cases := []struct {
name string
fh *FloatHistogram
expected int
}{
{
"without spans and buckets",
&FloatHistogram{ // 8 bytes.
CounterResetHint: 0, // 1 byte.
Schema: 1, // 4 bytes.
ZeroThreshold: 0.01, // 8 bytes.
ZeroCount: 5.5, // 8 bytes.
Count: 3493.3, // 8 bytes.
Sum: 2349209.324, // 8 bytes.
PositiveSpans: nil, // 24 bytes.
PositiveBuckets: nil, // 24 bytes.
NegativeSpans: nil, // 24 bytes.
NegativeBuckets: nil, // 24 bytes.
},
8 + 4 + 4 + 8 + 8 + 8 + 8 + 24 + 24 + 24 + 24,
},
{
"complete struct",
&FloatHistogram{ // 8 bytes.
CounterResetHint: 0, // 1 byte.
Schema: 1, // 4 bytes.
ZeroThreshold: 0.01, // 8 bytes.
ZeroCount: 5.5, // 8 bytes.
Count: 3493.3, // 8 bytes.
Sum: 2349209.324, // 8 bytes.
PositiveSpans: []Span{ // 24 bytes.
{-2, 1}, // 2 * 4 bytes.
{2, 3}, // 2 * 4 bytes.
},
PositiveBuckets: []float64{1, 3.3, 4.2, 0.1}, // 24 bytes + 4 * 8 bytes.
NegativeSpans: []Span{ // 24 bytes.
{3, 2}, // 2 * 4 bytes.
{3, 2}}, // 2 * 4 bytes.
NegativeBuckets: []float64{3.1, 3, 1.234e5, 1000}, // 24 bytes + 4 * 8 bytes.
},
8 + 4 + 4 + 8 + 8 + 8 + 8 + (24 + 2*4 + 2*4) + (24 + 2*4 + 2*4) + (24 + 4*8) + (24 + 4*8),
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
require.Equal(t, c.expected, c.fh.Size())
})
}
}

View file

@ -16,6 +16,8 @@ package textparse
import (
"mime"
"github.com/gogo/protobuf/types"
"github.com/prometheus/prometheus/model/exemplar"
"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/labels"
@ -64,6 +66,11 @@ type Parser interface {
// retrieved (including the case where no exemplars exist at all).
Exemplar(l *exemplar.Exemplar) bool
// CreatedTimestamp writes the created timestamp of the current sample
// into the passed timestamp. It returns false if no created timestamp
// exists or if the metric type does not support created timestamps.
CreatedTimestamp(ct *types.Timestamp) bool
// Next advances the parser to the next sample. It returns false if no
// more samples were read or an error occurred.
Next() (Entry, error)

View file

@ -24,6 +24,8 @@ import (
"strings"
"unicode/utf8"
"github.com/gogo/protobuf/types"
"github.com/prometheus/prometheus/model/exemplar"
"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/labels"
@ -211,6 +213,11 @@ func (p *OpenMetricsParser) Exemplar(e *exemplar.Exemplar) bool {
return true
}
// CreatedTimestamp returns false because OpenMetricsParser does not support created timestamps (yet).
func (p *OpenMetricsParser) CreatedTimestamp(_ *types.Timestamp) bool {
return false
}
// nextToken returns the next token from the openMetricsLexer.
func (p *OpenMetricsParser) nextToken() token {
tok := p.l.Lex()

View file

@ -26,6 +26,8 @@ import (
"unicode/utf8"
"unsafe"
"github.com/gogo/protobuf/types"
"github.com/prometheus/prometheus/model/exemplar"
"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/labels"
@ -245,6 +247,11 @@ func (p *PromParser) Exemplar(*exemplar.Exemplar) bool {
return false
}
// CreatedTimestamp returns false because PromParser does not support created timestamps.
func (p *PromParser) CreatedTimestamp(_ *types.Timestamp) bool {
return false
}
// nextToken returns the next token from the promlexer. It skips over tabs
// and spaces.
func (p *PromParser) nextToken() token {

View file

@ -23,6 +23,7 @@ import (
"unicode/utf8"
"github.com/gogo/protobuf/proto"
"github.com/gogo/protobuf/types"
"github.com/pkg/errors"
"github.com/prometheus/common/model"
@ -147,9 +148,15 @@ func (p *ProtobufParser) Series() ([]byte, *int64, float64) {
if ts != 0 {
return p.metricBytes.Bytes(), &ts, v
}
// Nasty hack: Assume that ts==0 means no timestamp. That's not true in
// general, but proto3 has no distinction between unset and
// default. Need to avoid in the final format.
// TODO(beorn7): We assume here that ts==0 means no timestamp. That's
// not true in general, but proto3 originally has no distinction between
// unset and default. At a later stage, the `optional` keyword was
// (re-)introduced in proto3, but gogo-protobuf never got updated to
// support it. (Note that setting `[(gogoproto.nullable) = true]` for
// the `timestamp_ms` field doesn't help, either.) We plan to migrate
// away from gogo-protobuf to an actively maintained protobuf
// implementation. Once that's done, we can simply use the `optional`
// keyword and check for the unset state explicitly.
return p.metricBytes.Bytes(), nil, v
}
@ -347,6 +354,24 @@ func (p *ProtobufParser) Exemplar(ex *exemplar.Exemplar) bool {
return true
}
func (p *ProtobufParser) CreatedTimestamp(ct *types.Timestamp) bool {
var foundCT *types.Timestamp
switch p.mf.GetType() {
case dto.MetricType_COUNTER:
foundCT = p.mf.GetMetric()[p.metricPos].GetCounter().GetCreatedTimestamp()
case dto.MetricType_SUMMARY:
foundCT = p.mf.GetMetric()[p.metricPos].GetSummary().GetCreatedTimestamp()
case dto.MetricType_HISTOGRAM, dto.MetricType_GAUGE_HISTOGRAM:
foundCT = p.mf.GetMetric()[p.metricPos].GetHistogram().GetCreatedTimestamp()
default:
}
if foundCT == nil {
return false
}
*ct = *foundCT
return true
}
// Next advances the parser to the next "sample" (emulating the behavior of a
// text format parser). It returns (EntryInvalid, io.EOF) if no samples were
// read.

View file

@ -21,6 +21,7 @@ import (
"testing"
"github.com/gogo/protobuf/proto"
"github.com/gogo/protobuf/types"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/model/exemplar"
@ -530,6 +531,69 @@ metric: <
>
>
`,
`name: "test_counter_with_createdtimestamp"
help: "A counter with a created timestamp."
type: COUNTER
metric: <
counter: <
value: 42
created_timestamp: <
seconds: 1
nanos: 1
>
>
>
`,
`name: "test_summary_with_createdtimestamp"
help: "A summary with a created timestamp."
type: SUMMARY
metric: <
summary: <
sample_count: 42
sample_sum: 1.234
created_timestamp: <
seconds: 1
nanos: 1
>
>
>
`,
`name: "test_histogram_with_createdtimestamp"
help: "A histogram with a created timestamp."
type: HISTOGRAM
metric: <
histogram: <
created_timestamp: <
seconds: 1
nanos: 1
>
positive_span: <
offset: 0
length: 0
>
>
>
`,
`name: "test_gaugehistogram_with_createdtimestamp"
help: "A gauge histogram with a created timestamp."
type: GAUGE_HISTOGRAM
metric: <
histogram: <
created_timestamp: <
seconds: 1
nanos: 1
>
positive_span: <
offset: 0
length: 0
>
>
>
`,
}
@ -566,6 +630,7 @@ func TestProtobufParse(t *testing.T) {
shs *histogram.Histogram
fhs *histogram.FloatHistogram
e []exemplar.Exemplar
ct *types.Timestamp
}
inputBuf := createTestProtoBuf(t)
@ -997,6 +1062,86 @@ func TestProtobufParse(t *testing.T) {
"__name__", "empty_histogram",
),
},
{
m: "test_counter_with_createdtimestamp",
help: "A counter with a created timestamp.",
},
{
m: "test_counter_with_createdtimestamp",
typ: MetricTypeCounter,
},
{
m: "test_counter_with_createdtimestamp",
v: 42,
ct: &types.Timestamp{Seconds: 1, Nanos: 1},
lset: labels.FromStrings(
"__name__", "test_counter_with_createdtimestamp",
),
},
{
m: "test_summary_with_createdtimestamp",
help: "A summary with a created timestamp.",
},
{
m: "test_summary_with_createdtimestamp",
typ: MetricTypeSummary,
},
{
m: "test_summary_with_createdtimestamp_count",
v: 42,
ct: &types.Timestamp{Seconds: 1, Nanos: 1},
lset: labels.FromStrings(
"__name__", "test_summary_with_createdtimestamp_count",
),
},
{
m: "test_summary_with_createdtimestamp_sum",
v: 1.234,
ct: &types.Timestamp{Seconds: 1, Nanos: 1},
lset: labels.FromStrings(
"__name__", "test_summary_with_createdtimestamp_sum",
),
},
{
m: "test_histogram_with_createdtimestamp",
help: "A histogram with a created timestamp.",
},
{
m: "test_histogram_with_createdtimestamp",
typ: MetricTypeHistogram,
},
{
m: "test_histogram_with_createdtimestamp",
ct: &types.Timestamp{Seconds: 1, Nanos: 1},
shs: &histogram.Histogram{
CounterResetHint: histogram.UnknownCounterReset,
PositiveSpans: []histogram.Span{},
NegativeSpans: []histogram.Span{},
},
lset: labels.FromStrings(
"__name__", "test_histogram_with_createdtimestamp",
),
},
{
m: "test_gaugehistogram_with_createdtimestamp",
help: "A gauge histogram with a created timestamp.",
},
{
m: "test_gaugehistogram_with_createdtimestamp",
typ: MetricTypeGaugeHistogram,
},
{
m: "test_gaugehistogram_with_createdtimestamp",
ct: &types.Timestamp{Seconds: 1, Nanos: 1},
shs: &histogram.Histogram{
CounterResetHint: histogram.GaugeType,
PositiveSpans: []histogram.Span{},
NegativeSpans: []histogram.Span{},
},
lset: labels.FromStrings(
"__name__", "test_gaugehistogram_with_createdtimestamp",
),
},
},
},
{
@ -1739,6 +1884,86 @@ func TestProtobufParse(t *testing.T) {
"__name__", "empty_histogram",
),
},
{ // 81
m: "test_counter_with_createdtimestamp",
help: "A counter with a created timestamp.",
},
{ // 82
m: "test_counter_with_createdtimestamp",
typ: MetricTypeCounter,
},
{ // 83
m: "test_counter_with_createdtimestamp",
v: 42,
ct: &types.Timestamp{Seconds: 1, Nanos: 1},
lset: labels.FromStrings(
"__name__", "test_counter_with_createdtimestamp",
),
},
{ // 84
m: "test_summary_with_createdtimestamp",
help: "A summary with a created timestamp.",
},
{ // 85
m: "test_summary_with_createdtimestamp",
typ: MetricTypeSummary,
},
{ // 86
m: "test_summary_with_createdtimestamp_count",
v: 42,
ct: &types.Timestamp{Seconds: 1, Nanos: 1},
lset: labels.FromStrings(
"__name__", "test_summary_with_createdtimestamp_count",
),
},
{ // 87
m: "test_summary_with_createdtimestamp_sum",
v: 1.234,
ct: &types.Timestamp{Seconds: 1, Nanos: 1},
lset: labels.FromStrings(
"__name__", "test_summary_with_createdtimestamp_sum",
),
},
{ // 88
m: "test_histogram_with_createdtimestamp",
help: "A histogram with a created timestamp.",
},
{ // 89
m: "test_histogram_with_createdtimestamp",
typ: MetricTypeHistogram,
},
{ // 90
m: "test_histogram_with_createdtimestamp",
ct: &types.Timestamp{Seconds: 1, Nanos: 1},
shs: &histogram.Histogram{
CounterResetHint: histogram.UnknownCounterReset,
PositiveSpans: []histogram.Span{},
NegativeSpans: []histogram.Span{},
},
lset: labels.FromStrings(
"__name__", "test_histogram_with_createdtimestamp",
),
},
{ // 91
m: "test_gaugehistogram_with_createdtimestamp",
help: "A gauge histogram with a created timestamp.",
},
{ // 92
m: "test_gaugehistogram_with_createdtimestamp",
typ: MetricTypeGaugeHistogram,
},
{ // 93
m: "test_gaugehistogram_with_createdtimestamp",
ct: &types.Timestamp{Seconds: 1, Nanos: 1},
shs: &histogram.Histogram{
CounterResetHint: histogram.GaugeType,
PositiveSpans: []histogram.Span{},
NegativeSpans: []histogram.Span{},
},
lset: labels.FromStrings(
"__name__", "test_gaugehistogram_with_createdtimestamp",
),
},
},
},
}
@ -1764,8 +1989,10 @@ func TestProtobufParse(t *testing.T) {
m, ts, v := p.Series()
var e exemplar.Exemplar
var ct types.Timestamp
p.Metric(&res)
found := p.Exemplar(&e)
eFound := p.Exemplar(&e)
ctFound := p.CreatedTimestamp(&ct)
require.Equal(t, exp[i].m, string(m), "i: %d", i)
if ts != nil {
require.Equal(t, exp[i].t, *ts, "i: %d", i)
@ -1775,12 +2002,18 @@ func TestProtobufParse(t *testing.T) {
require.Equal(t, exp[i].v, v, "i: %d", i)
require.Equal(t, exp[i].lset, res, "i: %d", i)
if len(exp[i].e) == 0 {
require.Equal(t, false, found, "i: %d", i)
require.Equal(t, false, eFound, "i: %d", i)
} else {
require.Equal(t, true, found, "i: %d", i)
require.Equal(t, true, eFound, "i: %d", i)
require.Equal(t, exp[i].e[0], e, "i: %d", i)
require.False(t, p.Exemplar(&e), "too many exemplars returned, i: %d", i)
}
if exp[i].ct != nil {
require.Equal(t, true, ctFound, "i: %d", i)
require.Equal(t, exp[i].ct.String(), ct.String(), "i: %d", i)
} else {
require.Equal(t, false, ctFound, "i: %d", i)
}
case EntryHistogram:
m, ts, shs, fhs := p.Histogram()

View file

@ -965,68 +965,67 @@ func init() {
}
var fileDescriptor_d1e5ddb18987a258 = []byte{
// 963 bytes of a gzipped FileDescriptorProto
// 960 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xa4, 0x56, 0xdd, 0x6e, 0x1b, 0x45,
0x14, 0xee, 0x76, 0xfd, 0x93, 0x3d, 0x8e, 0x93, 0xcd, 0x60, 0x55, 0xab, 0x40, 0x62, 0xb3, 0x12,
0x52, 0x40, 0xc8, 0x16, 0x50, 0x04, 0x2a, 0x45, 0x22, 0x69, 0xd3, 0x14, 0x15, 0xb7, 0x65, 0x6c,
0x5f, 0x94, 0x9b, 0xd5, 0xd8, 0x9e, 0xac, 0x57, 0xec, 0xee, 0x2c, 0xfb, 0x53, 0x11, 0xee, 0x79,
0x06, 0x5e, 0x01, 0xf1, 0x1c, 0x08, 0xf5, 0x92, 0x07, 0x40, 0x08, 0xe5, 0x49, 0xd0, 0xfc, 0xed,
0x3a, 0xd5, 0xba, 0x90, 0xf6, 0x6e, 0xe6, 0xf3, 0x77, 0xce, 0x7c, 0xe7, 0x9b, 0xf1, 0x39, 0x0b,
0x6e, 0xc0, 0x46, 0x49, 0xca, 0x22, 0x9a, 0xaf, 0x68, 0x91, 0x8d, 0x16, 0x61, 0x40, 0xe3, 0x7c,
0x14, 0xd1, 0x3c, 0x0d, 0x16, 0xd9, 0x30, 0x49, 0x59, 0xce, 0x50, 0x2f, 0x60, 0xc3, 0x8a, 0x33,
0x94, 0x9c, 0xfd, 0x9e, 0xcf, 0x7c, 0x26, 0x08, 0x23, 0xbe, 0x92, 0xdc, 0xfd, 0xbe, 0xcf, 0x98,
0x1f, 0xd2, 0x91, 0xd8, 0xcd, 0x8b, 0xf3, 0x51, 0x1e, 0x44, 0x34, 0xcb, 0x49, 0x94, 0x48, 0x82,
0xfb, 0x29, 0x58, 0xdf, 0x90, 0x39, 0x0d, 0x9f, 0x92, 0x20, 0x45, 0x08, 0x1a, 0x31, 0x89, 0xa8,
0x63, 0x0c, 0x8c, 0x23, 0x0b, 0x8b, 0x35, 0xea, 0x41, 0xf3, 0x39, 0x09, 0x0b, 0xea, 0xdc, 0x14,
0xa0, 0xdc, 0xb8, 0x07, 0xd0, 0x3c, 0x23, 0x85, 0xbf, 0xf6, 0x33, 0x8f, 0x31, 0xf4, 0xcf, 0xbf,
0x19, 0xd0, 0xbe, 0xc7, 0x8a, 0x38, 0xa7, 0x69, 0x3d, 0x03, 0xdd, 0x81, 0x2d, 0xfa, 0x23, 0x8d,
0x92, 0x90, 0xa4, 0x22, 0x73, 0xe7, 0xe3, 0xc3, 0x61, 0x5d, 0x5d, 0xc3, 0x53, 0xc5, 0xc2, 0x25,
0x1f, 0x8d, 0x61, 0x6f, 0x91, 0x52, 0x92, 0xd3, 0xa5, 0x57, 0x96, 0xe3, 0x98, 0x22, 0xc9, 0xfe,
0x50, 0x16, 0x3c, 0xd4, 0x05, 0x0f, 0xa7, 0x9a, 0x71, 0xd2, 0x78, 0xf1, 0x77, 0xdf, 0xc0, 0xb6,
0x0a, 0x2d, 0x71, 0xf7, 0x2e, 0x6c, 0x7d, 0x5b, 0x90, 0x38, 0x0f, 0x42, 0x8a, 0xf6, 0x61, 0xeb,
0x07, 0xb5, 0x56, 0x7a, 0xcb, 0xfd, 0x55, 0x27, 0xca, 0x52, 0xff, 0x32, 0xa0, 0x3d, 0x29, 0xa2,
0x88, 0xa4, 0x17, 0xe8, 0x5d, 0xd8, 0xce, 0x48, 0x94, 0x84, 0xd4, 0x5b, 0xf0, 0xe2, 0x45, 0x86,
0x06, 0xee, 0x48, 0x4c, 0xf8, 0x81, 0x0e, 0x00, 0x14, 0x25, 0x2b, 0x22, 0x95, 0xc9, 0x92, 0xc8,
0xa4, 0x88, 0xd0, 0x57, 0x6b, 0xe7, 0x9b, 0x03, 0x73, 0xb3, 0x2d, 0x5a, 0xb1, 0xa8, 0xea, 0xc6,
0x9a, 0xca, 0x5a, 0x73, 0x1a, 0xaf, 0x6d, 0x4e, 0x1f, 0xda, 0xb3, 0x38, 0xbf, 0x48, 0xe8, 0x72,
0xc3, 0x55, 0xff, 0xde, 0x04, 0xeb, 0x61, 0x90, 0xe5, 0xcc, 0x4f, 0x49, 0xf4, 0x7f, 0x1c, 0xf8,
0x10, 0xd0, 0x3a, 0xc5, 0x3b, 0x0f, 0x19, 0xc9, 0x85, 0x42, 0x03, 0xdb, 0x6b, 0xc4, 0x07, 0x1c,
0xff, 0x2f, 0xbf, 0xee, 0x40, 0x6b, 0x5e, 0x2c, 0xbe, 0xa7, 0xb9, 0x72, 0xeb, 0x9d, 0x7a, 0xb7,
0x4e, 0x04, 0x47, 0x79, 0xa5, 0x22, 0xea, 0x9d, 0xda, 0x7d, 0x5d, 0xa7, 0xd0, 0x2d, 0x68, 0x65,
0x8b, 0x15, 0x8d, 0x88, 0xd3, 0x1c, 0x18, 0x47, 0x7b, 0x58, 0xed, 0xd0, 0x7b, 0xb0, 0xf3, 0x13,
0x4d, 0x99, 0x97, 0xaf, 0x52, 0x9a, 0xad, 0x58, 0xb8, 0x74, 0x5a, 0xa2, 0x8a, 0x2e, 0x47, 0xa7,
0x1a, 0xe4, 0x85, 0x0a, 0x9a, 0xf4, 0xad, 0x2d, 0x7c, 0xb3, 0x38, 0x22, 0x5d, 0x3b, 0x02, 0xbb,
0xfa, 0x59, 0x79, 0xb6, 0x25, 0xf2, 0xec, 0x94, 0x24, 0xe9, 0xd8, 0x23, 0xe8, 0xc6, 0xd4, 0x27,
0x79, 0xf0, 0x9c, 0x7a, 0x59, 0x42, 0x62, 0xc7, 0x12, 0xce, 0x0c, 0x5e, 0xe5, 0xcc, 0x24, 0x21,
0xb1, 0x72, 0x67, 0x5b, 0x07, 0x73, 0x8c, 0x8b, 0x2f, 0x93, 0x2d, 0x69, 0x98, 0x13, 0x07, 0x06,
0xe6, 0x11, 0xc2, 0xe5, 0x11, 0xf7, 0x39, 0x78, 0x85, 0x26, 0x0b, 0xe8, 0x0c, 0x4c, 0x5e, 0xa3,
0x46, 0x65, 0x11, 0x8f, 0xa0, 0x9b, 0xb0, 0x2c, 0xa8, 0xa4, 0x6d, 0x5f, 0x4f, 0x9a, 0x0e, 0xd6,
0xd2, 0xca, 0x64, 0x52, 0x5a, 0x57, 0x4a, 0xd3, 0x68, 0x29, 0xad, 0xa4, 0x49, 0x69, 0x3b, 0x52,
0x9a, 0x46, 0x85, 0x34, 0xf7, 0x0f, 0x03, 0x5a, 0xf2, 0x40, 0xf4, 0x3e, 0xd8, 0x8b, 0x22, 0x2a,
0xc2, 0xf5, 0x72, 0xe4, 0x3b, 0xde, 0xad, 0x70, 0x59, 0xd0, 0x6d, 0xb8, 0xf5, 0x32, 0xf5, 0xca,
0x7b, 0xee, 0xbd, 0x14, 0x20, 0x6f, 0xa8, 0x0f, 0x9d, 0x22, 0x49, 0x68, 0xea, 0xcd, 0x59, 0x11,
0x2f, 0xd5, 0xa3, 0x06, 0x01, 0x9d, 0x70, 0xe4, 0x4a, 0x73, 0x34, 0xaf, 0xd7, 0x1c, 0xdd, 0xbb,
0x00, 0x95, 0x71, 0xfc, 0x51, 0xb2, 0xf3, 0xf3, 0x8c, 0xca, 0x0a, 0xf6, 0xb0, 0xda, 0x71, 0x3c,
0xa4, 0xb1, 0x9f, 0xaf, 0xc4, 0xe9, 0x5d, 0xac, 0x76, 0xee, 0x2f, 0x06, 0x6c, 0xe9, 0xa4, 0xe8,
0x0b, 0x68, 0x86, 0x7c, 0x36, 0x38, 0x86, 0xb8, 0xa6, 0x7e, 0xbd, 0x86, 0x72, 0x7c, 0xa8, 0x5b,
0x92, 0x31, 0xf5, 0xdd, 0x12, 0x7d, 0x0e, 0xd6, 0x35, 0x5a, 0x36, 0xae, 0xc8, 0xee, 0xcf, 0x26,
0xb4, 0xc6, 0x62, 0x0e, 0xbe, 0x99, 0xae, 0x8f, 0xa0, 0xe9, 0xf3, 0xc9, 0xa5, 0xa6, 0xce, 0xdb,
0xf5, 0xc1, 0x62, 0xb8, 0x61, 0xc9, 0x44, 0x9f, 0x41, 0x7b, 0x21, 0x87, 0x99, 0x92, 0x7c, 0x50,
0x1f, 0xa4, 0x26, 0x1e, 0xd6, 0x6c, 0x1e, 0x98, 0xc9, 0xd1, 0xa0, 0x3a, 0xf0, 0x86, 0x40, 0x35,
0x3f, 0xb0, 0x66, 0xf3, 0xc0, 0x42, 0x76, 0x5d, 0xd1, 0x4c, 0x36, 0x06, 0xaa, 0xd6, 0x8c, 0x35,
0x1b, 0x7d, 0x09, 0xd6, 0x4a, 0x37, 0x63, 0xd1, 0x44, 0x36, 0xda, 0x53, 0xf6, 0x6c, 0x5c, 0x45,
0xf0, 0xf6, 0x5d, 0x3a, 0xee, 0x45, 0x99, 0xe8, 0x54, 0x26, 0xee, 0x94, 0xd8, 0x38, 0x73, 0x7f,
0x35, 0x60, 0x5b, 0xde, 0xc3, 0x03, 0x12, 0x05, 0xe1, 0x45, 0xed, 0x47, 0x03, 0x82, 0xc6, 0x8a,
0x86, 0x89, 0xfa, 0x66, 0x10, 0x6b, 0x74, 0x1b, 0x1a, 0x5c, 0xa3, 0xb0, 0x70, 0x67, 0xd3, 0x7f,
0x5e, 0x66, 0x9e, 0x5e, 0x24, 0x14, 0x0b, 0x36, 0x6f, 0xf0, 0xf2, 0xeb, 0xc7, 0x69, 0xbc, 0xaa,
0xc1, 0xcb, 0x38, 0xdd, 0xe0, 0x65, 0xc4, 0x07, 0x73, 0x80, 0x2a, 0x1f, 0xea, 0x40, 0xfb, 0xde,
0x93, 0xd9, 0xe3, 0xe9, 0x29, 0xb6, 0x6f, 0x20, 0x0b, 0x9a, 0x67, 0xc7, 0xb3, 0xb3, 0x53, 0xdb,
0xe0, 0xf8, 0x64, 0x36, 0x1e, 0x1f, 0xe3, 0x67, 0xf6, 0x4d, 0xbe, 0x99, 0x3d, 0x9e, 0x3e, 0x7b,
0x7a, 0x7a, 0xdf, 0x36, 0x51, 0x17, 0xac, 0x87, 0x5f, 0x4f, 0xa6, 0x4f, 0xce, 0xf0, 0xf1, 0xd8,
0x6e, 0xa0, 0xb7, 0x60, 0x57, 0xc4, 0x78, 0x15, 0xd8, 0x3c, 0x71, 0x5f, 0x5c, 0x1e, 0x1a, 0x7f,
0x5e, 0x1e, 0x1a, 0xff, 0x5c, 0x1e, 0x1a, 0xdf, 0xf5, 0x02, 0xe6, 0x55, 0xe2, 0x3c, 0x29, 0x6e,
0xde, 0x12, 0x2f, 0xfb, 0x93, 0x7f, 0x03, 0x00, 0x00, 0xff, 0xff, 0x68, 0x3f, 0xd9, 0x07, 0xdd,
0x09, 0x00, 0x00,
0x14, 0xee, 0xd6, 0xbf, 0x7b, 0x1c, 0x27, 0x9b, 0xc1, 0xaa, 0x56, 0x81, 0xc4, 0x66, 0x25, 0xa4,
0x80, 0x90, 0x2d, 0xa0, 0x08, 0x54, 0x8a, 0x44, 0xd2, 0xa6, 0x2e, 0x2a, 0x6e, 0xcb, 0xd8, 0xbe,
0x28, 0x37, 0xab, 0xb1, 0x3d, 0x59, 0xaf, 0xd8, 0xdd, 0x59, 0xf6, 0xa7, 0x22, 0xdc, 0xf3, 0x0c,
0xbc, 0x00, 0x17, 0x3c, 0x05, 0x97, 0xa8, 0x97, 0x5c, 0x71, 0x89, 0x50, 0x9e, 0x04, 0xcd, 0xdf,
0xae, 0x53, 0xad, 0x03, 0x81, 0xbb, 0x99, 0xcf, 0xdf, 0x39, 0xf3, 0x9d, 0x6f, 0xc6, 0xe7, 0x2c,
0x38, 0x3e, 0x1b, 0xc5, 0x09, 0x0b, 0x69, 0xb6, 0xa6, 0x79, 0x3a, 0x5a, 0x06, 0x3e, 0x8d, 0xb2,
0x51, 0x48, 0xb3, 0xc4, 0x5f, 0xa6, 0xc3, 0x38, 0x61, 0x19, 0x43, 0x3d, 0x9f, 0x0d, 0x4b, 0xce,
0x50, 0x72, 0x0e, 0x7a, 0x1e, 0xf3, 0x98, 0x20, 0x8c, 0xf8, 0x4a, 0x72, 0x0f, 0xfa, 0x1e, 0x63,
0x5e, 0x40, 0x47, 0x62, 0xb7, 0xc8, 0xcf, 0x47, 0x99, 0x1f, 0xd2, 0x34, 0x23, 0x61, 0x2c, 0x09,
0xce, 0xc7, 0x60, 0x7e, 0x45, 0x16, 0x34, 0x78, 0x4e, 0xfc, 0x04, 0x21, 0xa8, 0x47, 0x24, 0xa4,
0xb6, 0x31, 0x30, 0x8e, 0x4d, 0x2c, 0xd6, 0xa8, 0x07, 0x8d, 0x97, 0x24, 0xc8, 0xa9, 0x7d, 0x5b,
0x80, 0x72, 0xe3, 0x1c, 0x42, 0x63, 0x4c, 0x72, 0x6f, 0xe3, 0x67, 0x1e, 0x63, 0xe8, 0x9f, 0x7f,
0x36, 0xa0, 0xf5, 0x80, 0xe5, 0x51, 0x46, 0x93, 0x6a, 0x06, 0xba, 0x07, 0x6d, 0xfa, 0x3d, 0x0d,
0xe3, 0x80, 0x24, 0x22, 0x73, 0xe7, 0xc3, 0xa3, 0x61, 0x55, 0x5d, 0xc3, 0x33, 0xc5, 0xc2, 0x05,
0x1f, 0x8d, 0x61, 0x7f, 0x99, 0x50, 0x92, 0xd1, 0x95, 0x5b, 0x94, 0x63, 0xd7, 0x44, 0x92, 0x83,
0xa1, 0x2c, 0x78, 0xa8, 0x0b, 0x1e, 0xce, 0x34, 0x03, 0x5b, 0x2a, 0xa8, 0x40, 0x9c, 0xfb, 0xd0,
0xfe, 0x3a, 0x27, 0x51, 0xe6, 0x07, 0x14, 0x1d, 0x40, 0xfb, 0x3b, 0xb5, 0x56, 0x4a, 0x8b, 0xfd,
0x55, 0x0f, 0x8a, 0x22, 0xff, 0x30, 0xa0, 0x35, 0xcd, 0xc3, 0x90, 0x24, 0x17, 0xe8, 0x6d, 0xd8,
0x49, 0x49, 0x18, 0x07, 0xd4, 0x5d, 0xf2, 0xb2, 0x45, 0x86, 0x3a, 0xee, 0x48, 0x4c, 0x38, 0x81,
0x0e, 0x01, 0x14, 0x25, 0xcd, 0x43, 0x95, 0xc9, 0x94, 0xc8, 0x34, 0x0f, 0xd1, 0x17, 0x1b, 0xe7,
0xd7, 0x06, 0xb5, 0xed, 0x86, 0x68, 0xc5, 0xa7, 0xf5, 0x57, 0x7f, 0xf6, 0x6f, 0x6d, 0xa8, 0xac,
0xb4, 0xa5, 0xfe, 0x1f, 0x6c, 0xe9, 0x43, 0x6b, 0x1e, 0x65, 0x17, 0x31, 0x5d, 0x6d, 0xb9, 0xde,
0x5f, 0x1b, 0x60, 0x3e, 0xf6, 0xd3, 0x8c, 0x79, 0x09, 0x09, 0xff, 0x4d, 0xed, 0xef, 0x03, 0xda,
0xa4, 0xb8, 0xe7, 0x01, 0x23, 0x99, 0xd0, 0x66, 0x60, 0x6b, 0x83, 0xf8, 0x88, 0xe3, 0xff, 0xe4,
0xd4, 0x3d, 0x68, 0x2e, 0xf2, 0xe5, 0xb7, 0x34, 0x53, 0x3e, 0xbd, 0x55, 0xed, 0xd3, 0xa9, 0xe0,
0x28, 0x97, 0x54, 0x44, 0xb5, 0x47, 0x7b, 0x37, 0xf7, 0x08, 0xdd, 0x81, 0x66, 0xba, 0x5c, 0xd3,
0x90, 0xd8, 0x8d, 0x81, 0x71, 0xbc, 0x8f, 0xd5, 0x0e, 0xbd, 0x03, 0xbb, 0x3f, 0xd0, 0x84, 0xb9,
0xd9, 0x3a, 0xa1, 0xe9, 0x9a, 0x05, 0x2b, 0xbb, 0x29, 0xf4, 0x77, 0x39, 0x3a, 0xd3, 0x20, 0x2f,
0x51, 0xd0, 0xa4, 0x63, 0x2d, 0xe1, 0x98, 0xc9, 0x11, 0xe9, 0xd7, 0x31, 0x58, 0xe5, 0xcf, 0xca,
0xad, 0xb6, 0xc8, 0xb3, 0x5b, 0x90, 0xa4, 0x57, 0x4f, 0xa0, 0x1b, 0x51, 0x8f, 0x64, 0xfe, 0x4b,
0xea, 0xa6, 0x31, 0x89, 0x6c, 0x53, 0x78, 0x32, 0xb8, 0xce, 0x93, 0x69, 0x4c, 0x22, 0xe5, 0xcb,
0x8e, 0x0e, 0xe6, 0x18, 0x17, 0x5f, 0x24, 0x5b, 0xd1, 0x20, 0x23, 0x36, 0x0c, 0x6a, 0xc7, 0x08,
0x17, 0x47, 0x3c, 0xe4, 0xe0, 0x15, 0x9a, 0x2c, 0xa0, 0x33, 0xa8, 0xf1, 0x1a, 0x35, 0x2a, 0x8b,
0x78, 0x02, 0xdd, 0x98, 0xa5, 0x7e, 0x29, 0x6d, 0xe7, 0x66, 0xd2, 0x74, 0xb0, 0x96, 0x56, 0x24,
0x93, 0xd2, 0xba, 0x52, 0x9a, 0x46, 0x0b, 0x69, 0x05, 0x4d, 0x4a, 0xdb, 0x95, 0xd2, 0x34, 0x2a,
0xa4, 0x39, 0xbf, 0x19, 0xd0, 0x94, 0x07, 0xa2, 0x77, 0xc1, 0x5a, 0xe6, 0x61, 0x1e, 0x6c, 0x96,
0x23, 0x5f, 0xf0, 0x5e, 0x89, 0xcb, 0x82, 0xee, 0xc2, 0x9d, 0xd7, 0xa9, 0x57, 0x5e, 0x72, 0xef,
0xb5, 0x00, 0x79, 0x43, 0x7d, 0xe8, 0xe4, 0x71, 0x4c, 0x13, 0x77, 0xc1, 0xf2, 0x68, 0xa5, 0x9e,
0x33, 0x08, 0xe8, 0x94, 0x23, 0x57, 0x5a, 0x61, 0xed, 0x66, 0xad, 0xd0, 0xb9, 0x0f, 0x50, 0x1a,
0xc7, 0x1f, 0x25, 0x3b, 0x3f, 0x4f, 0xa9, 0xac, 0x60, 0x1f, 0xab, 0x1d, 0xc7, 0x03, 0x1a, 0x79,
0xd9, 0x5a, 0x9c, 0xde, 0xc5, 0x6a, 0xe7, 0xfc, 0x64, 0x40, 0x5b, 0x27, 0x45, 0x9f, 0x41, 0x23,
0xe0, 0x93, 0xc0, 0x36, 0xc4, 0x35, 0xf5, 0xab, 0x35, 0x14, 0xc3, 0x42, 0xdd, 0x92, 0x8c, 0xa9,
0xee, 0x90, 0xe8, 0x53, 0x30, 0x6f, 0xd2, 0xa0, 0x4b, 0xb2, 0xf3, 0x63, 0x0d, 0x9a, 0x13, 0x31,
0xf5, 0xfe, 0x9f, 0xae, 0x0f, 0xa0, 0xe1, 0xf1, 0x39, 0xa5, 0x66, 0xcc, 0x9b, 0xd5, 0xc1, 0x62,
0x94, 0x61, 0xc9, 0x44, 0x9f, 0x40, 0x6b, 0x29, 0x47, 0x97, 0x92, 0x7c, 0x58, 0x1d, 0xa4, 0xe6,
0x1b, 0xd6, 0x6c, 0x1e, 0x98, 0xca, 0x71, 0xa0, 0xba, 0xee, 0x96, 0x40, 0x35, 0x33, 0xb0, 0x66,
0xf3, 0xc0, 0x5c, 0xf6, 0x5b, 0xd1, 0x4c, 0xb6, 0x06, 0xaa, 0xa6, 0x8c, 0x35, 0x1b, 0x7d, 0x0e,
0xe6, 0x5a, 0xb7, 0x61, 0xd1, 0x44, 0xb6, 0xda, 0x53, 0x74, 0x6b, 0x5c, 0x46, 0xf0, 0xc6, 0x5d,
0x38, 0xee, 0x86, 0xa9, 0xe8, 0x54, 0x35, 0xdc, 0x29, 0xb0, 0x49, 0xea, 0xfc, 0x62, 0xc0, 0x8e,
0xbc, 0x87, 0x47, 0x24, 0xf4, 0x83, 0x8b, 0xca, 0x4f, 0x04, 0x04, 0xf5, 0x35, 0x0d, 0x62, 0xf5,
0x85, 0x20, 0xd6, 0xe8, 0x2e, 0xd4, 0xb9, 0x46, 0x61, 0xe1, 0xee, 0xb6, 0xff, 0xbc, 0xcc, 0x3c,
0xbb, 0x88, 0x29, 0x16, 0x6c, 0xde, 0xda, 0xe5, 0xb7, 0x8e, 0x5d, 0xbf, 0xae, 0xb5, 0xcb, 0x38,
0xdd, 0xda, 0x65, 0xc4, 0x7b, 0x0b, 0x80, 0x32, 0x1f, 0xea, 0x40, 0xeb, 0xc1, 0xb3, 0xf9, 0xd3,
0xd9, 0x19, 0xb6, 0x6e, 0x21, 0x13, 0x1a, 0xe3, 0x93, 0xf9, 0xf8, 0xcc, 0x32, 0x38, 0x3e, 0x9d,
0x4f, 0x26, 0x27, 0xf8, 0x85, 0x75, 0x9b, 0x6f, 0xe6, 0x4f, 0x67, 0x2f, 0x9e, 0x9f, 0x3d, 0xb4,
0x6a, 0xa8, 0x0b, 0xe6, 0xe3, 0x2f, 0xa7, 0xb3, 0x67, 0x63, 0x7c, 0x32, 0xb1, 0xea, 0xe8, 0x0d,
0xd8, 0x13, 0x31, 0x6e, 0x09, 0x36, 0x4e, 0x9d, 0x57, 0x97, 0x47, 0xc6, 0xef, 0x97, 0x47, 0xc6,
0x5f, 0x97, 0x47, 0xc6, 0x37, 0x3d, 0x9f, 0xb9, 0xa5, 0x38, 0x57, 0x8a, 0x5b, 0x34, 0xc5, 0xcb,
0xfe, 0xe8, 0xef, 0x00, 0x00, 0x00, 0xff, 0xff, 0x0d, 0x2e, 0x66, 0xc1, 0xcb, 0x09, 0x00, 0x00,
}
func (m *LabelPair) Marshal() (dAtA []byte, err error) {

View file

@ -52,7 +52,7 @@ message Counter {
double value = 1;
Exemplar exemplar = 2;
google.protobuf.Timestamp created_timestamp = 3 [(gogoproto.nullable) = true];
google.protobuf.Timestamp created_timestamp = 3;
}
message Quantile {
@ -65,7 +65,7 @@ message Summary {
double sample_sum = 2;
repeated Quantile quantile = 3 [(gogoproto.nullable) = false];
google.protobuf.Timestamp created_timestamp = 4 [(gogoproto.nullable) = true];
google.protobuf.Timestamp created_timestamp = 4;
}
message Untyped {
@ -79,7 +79,7 @@ message Histogram {
// Buckets for the conventional histogram.
repeated Bucket bucket = 3 [(gogoproto.nullable) = false]; // Ordered in increasing order of upper_bound, +Inf bucket is optional.
google.protobuf.Timestamp created_timestamp = 15 [(gogoproto.nullable) = true];
google.protobuf.Timestamp created_timestamp = 15;
// Everything below here is for native histograms (also known as sparse histograms).
// Native histograms are an experimental feature without stability guarantees.

View file

@ -1225,10 +1225,11 @@ func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper)
enh.Out = result[:0] // Reuse result vector.
warnings.Merge(ws)
ev.currentSamples += len(result)
vecNumSamples := result.TotalSamples()
ev.currentSamples += vecNumSamples
// When we reset currentSamples to tempNumSamples during the next iteration of the loop it also
// needs to include the samples from the result here, as they're still in memory.
tempNumSamples += len(result)
tempNumSamples += vecNumSamples
ev.samplesStats.UpdatePeak(ev.currentSamples)
if ev.currentSamples > ev.maxSamples {
@ -1324,12 +1325,10 @@ func (ev *evaluator) evalSubquery(subq *parser.SubqueryExpr) (*parser.MatrixSele
Range: subq.Range,
VectorSelector: vs,
}
totalSamples := 0
for _, s := range mat {
totalSamples += len(s.Floats) + len(s.Histograms)
vs.Series = append(vs.Series, NewStorageSeries(s))
}
return ms, totalSamples, ws
return ms, mat.TotalSamples(), ws
}
// eval evaluates the given expression as the given AST expression node requires.
@ -1471,7 +1470,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio
it := storage.NewBuffer(selRange)
var chkIter chunkenc.Iterator
for i, s := range selVS.Series {
ev.currentSamples -= len(floats) + len(histograms)
ev.currentSamples -= len(floats) + totalHPointSize(histograms)
if floats != nil {
floats = floats[:0]
}
@ -1515,7 +1514,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio
// Make the function call.
outVec, annos := call(inArgs, e.Args, enh)
warnings.Merge(annos)
ev.samplesStats.IncrementSamplesAtStep(step, int64(len(floats)+len(histograms)))
ev.samplesStats.IncrementSamplesAtStep(step, int64(len(floats)+totalHPointSize(histograms)))
enh.Out = outVec[:0]
if len(outVec) > 0 {
@ -1534,10 +1533,11 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio
// Only buffer stepRange milliseconds from the second step on.
it.ReduceDelta(stepRange)
}
if len(ss.Floats)+len(ss.Histograms) > 0 {
if ev.currentSamples+len(ss.Floats)+len(ss.Histograms) <= ev.maxSamples {
histSamples := totalHPointSize(ss.Histograms)
if len(ss.Floats)+histSamples > 0 {
if ev.currentSamples+len(ss.Floats)+histSamples <= ev.maxSamples {
mat = append(mat, ss)
ev.currentSamples += len(ss.Floats) + len(ss.Histograms)
ev.currentSamples += len(ss.Floats) + histSamples
} else {
ev.error(ErrTooManySamples(env))
}
@ -1546,7 +1546,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio
}
ev.samplesStats.UpdatePeak(ev.currentSamples)
ev.currentSamples -= len(floats) + len(histograms)
ev.currentSamples -= len(floats) + totalHPointSize(histograms)
putFPointSlice(floats)
putHPointSlice(histograms)
@ -1693,14 +1693,18 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio
ss.Floats = getFPointSlice(numSteps)
}
ss.Floats = append(ss.Floats, FPoint{F: f, T: ts})
ev.currentSamples++
ev.samplesStats.IncrementSamplesAtStep(step, 1)
} else {
if ss.Histograms == nil {
ss.Histograms = getHPointSlice(numSteps)
}
ss.Histograms = append(ss.Histograms, HPoint{H: h, T: ts})
point := HPoint{H: h, T: ts}
ss.Histograms = append(ss.Histograms, point)
histSize := point.size()
ev.currentSamples += histSize
ev.samplesStats.IncrementSamplesAtStep(step, int64(histSize))
}
ev.samplesStats.IncrementSamplesAtStep(step, 1)
ev.currentSamples++
} else {
ev.error(ErrTooManySamples(env))
}
@ -1808,13 +1812,15 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio
T: ts,
F: mat[i].Floats[0].F,
})
ev.currentSamples++
} else {
mat[i].Histograms = append(mat[i].Histograms, HPoint{
point := HPoint{
T: ts,
H: mat[i].Histograms[0].H,
})
}
mat[i].Histograms = append(mat[i].Histograms, point)
ev.currentSamples += point.size()
}
ev.currentSamples++
if ev.currentSamples > ev.maxSamples {
ev.error(ErrTooManySamples(env))
}
@ -1858,9 +1864,14 @@ func (ev *evaluator) rangeEvalTimestampFunctionOverVectorSelector(vs *parser.Vec
F: f,
H: h,
})
histSize := 0
if h != nil {
histSize := h.Size() / 16 // 16 bytes per sample.
ev.currentSamples += histSize
}
ev.currentSamples++
ev.samplesStats.IncrementSamplesAtTimestamp(enh.Ts, 1)
ev.samplesStats.IncrementSamplesAtTimestamp(enh.Ts, int64(1+histSize))
if ev.currentSamples > ev.maxSamples {
ev.error(ErrTooManySamples(env))
}
@ -1982,10 +1993,10 @@ func (ev *evaluator) matrixSelector(node *parser.MatrixSelector) (Matrix, annota
}
ss.Floats, ss.Histograms = ev.matrixIterSlice(it, mint, maxt, nil, nil)
totalLen := int64(len(ss.Floats)) + int64(len(ss.Histograms))
ev.samplesStats.IncrementSamplesAtTimestamp(ev.startTimestamp, totalLen)
totalSize := int64(len(ss.Floats)) + int64(totalHPointSize(ss.Histograms))
ev.samplesStats.IncrementSamplesAtTimestamp(ev.startTimestamp, totalSize)
if totalLen > 0 {
if totalSize > 0 {
matrix = append(matrix, ss)
} else {
putFPointSlice(ss.Floats)
@ -2041,13 +2052,13 @@ func (ev *evaluator) matrixIterSlice(
var drop int
for drop = 0; histograms[drop].T < mint; drop++ { // nolint:revive
}
ev.currentSamples -= drop
copy(histograms, histograms[drop:])
histograms = histograms[:len(histograms)-drop]
ev.currentSamples -= totalHPointSize(histograms)
// Only append points with timestamps after the last timestamp we have.
mintHistograms = histograms[len(histograms)-1].T + 1
} else {
ev.currentSamples -= len(histograms)
ev.currentSamples -= totalHPointSize(histograms)
if histograms != nil {
histograms = histograms[:0]
}
@ -2076,11 +2087,12 @@ loop:
if ev.currentSamples >= ev.maxSamples {
ev.error(ErrTooManySamples(env))
}
ev.currentSamples++
point := HPoint{T: t, H: h}
if histograms == nil {
histograms = getHPointSlice(16)
}
histograms = append(histograms, HPoint{T: t, H: h})
histograms = append(histograms, point)
ev.currentSamples += point.size()
}
case chunkenc.ValFloat:
t, f := buf.At()
@ -2111,8 +2123,9 @@ loop:
if histograms == nil {
histograms = getHPointSlice(16)
}
histograms = append(histograms, HPoint{T: t, H: h})
ev.currentSamples++
point := HPoint{T: t, H: h}
histograms = append(histograms, point)
ev.currentSamples += point.size()
}
case chunkenc.ValFloat:
t, f := it.At()

View file

@ -168,6 +168,23 @@ func (p HPoint) MarshalJSON() ([]byte, error) {
return json.Marshal([...]interface{}{float64(p.T) / 1000, h})
}
// size returns the size of the HPoint compared to the size of an FPoint.
// The total size is calculated considering the histogram timestamp (p.T - 8 bytes),
// and then a number of bytes in the histogram.
// This sum is divided by 16, as samples are 16 bytes.
func (p HPoint) size() int {
return (p.H.Size() + 8) / 16
}
// totalHPointSize returns the total number of samples in the given slice of HPoints.
func totalHPointSize(histograms []HPoint) int {
var total int
for _, h := range histograms {
total += h.size()
}
return total
}
// Sample is a single sample belonging to a metric. It represents either a float
// sample or a histogram sample. If H is nil, it is a float sample. Otherwise,
// it is a histogram sample.
@ -226,6 +243,21 @@ func (vec Vector) String() string {
return strings.Join(entries, "\n")
}
// TotalSamples returns the total number of samples in the series within a vector.
// Float samples have a weight of 1 in this number, while histogram samples have a higher
// weight according to their size compared with the size of a float sample.
// See HPoint.size for details.
func (vec Vector) TotalSamples() int {
numSamples := 0
for _, sample := range vec {
numSamples++
if sample.H != nil {
numSamples += sample.H.Size() / 16
}
}
return numSamples
}
// ContainsSameLabelset checks if a vector has samples with the same labelset
// Such a behavior is semantically undefined
// https://github.com/prometheus/prometheus/issues/4562
@ -264,10 +296,13 @@ func (m Matrix) String() string {
}
// TotalSamples returns the total number of samples in the series within a matrix.
// Float samples have a weight of 1 in this number, while histogram samples have a higher
// weight according to their size compared with the size of a float sample.
// See HPoint.size for details.
func (m Matrix) TotalSamples() int {
numSamples := 0
for _, series := range m {
numSamples += len(series.Floats) + len(series.Histograms)
numSamples += len(series.Floats) + totalHPointSize(series.Histograms)
}
return numSamples
}

923
rules/group.go Normal file
View file

@ -0,0 +1,923 @@
// Copyright 2013 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package rules
import (
"context"
"errors"
"math"
"sort"
"strings"
"sync"
"time"
"golang.org/x/exp/slices"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/timestamp"
"github.com/prometheus/prometheus/model/value"
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/tsdb/chunkenc"
)
// Group is a set of rules that have a logical relation.
type Group struct {
name string
file string
interval time.Duration
evaluationDelay *time.Duration
limit int
rules []Rule
sourceTenants []string
seriesInPreviousEval []map[string]labels.Labels // One per Rule.
staleSeries []labels.Labels
opts *ManagerOptions
mtx sync.Mutex
evaluationTime time.Duration
lastEvaluation time.Time // Wall-clock time of most recent evaluation.
lastEvalTimestamp time.Time // Time slot used for most recent evaluation.
shouldRestore bool
markStale bool
done chan struct{}
terminated chan struct{}
managerDone chan struct{}
logger log.Logger
metrics *Metrics
// Rule group evaluation iteration function,
// defaults to DefaultEvalIterationFunc.
evalIterationFunc GroupEvalIterationFunc
alignEvaluationTimeOnInterval bool
}
// GroupEvalIterationFunc is used to implement and extend rule group
// evaluation iteration logic. It is configured in Group.evalIterationFunc,
// and periodically invoked at each group evaluation interval to
// evaluate the rules in the group at that point in time.
// DefaultEvalIterationFunc is the default implementation.
type GroupEvalIterationFunc func(ctx context.Context, g *Group, evalTimestamp time.Time)
type GroupOptions struct {
Name, File string
Interval time.Duration
Limit int
Rules []Rule
SourceTenants []string
ShouldRestore bool
Opts *ManagerOptions
EvaluationDelay *time.Duration
done chan struct{}
EvalIterationFunc GroupEvalIterationFunc
AlignEvaluationTimeOnInterval bool
}
// NewGroup makes a new Group with the given name, options, and rules.
func NewGroup(o GroupOptions) *Group {
metrics := o.Opts.Metrics
if metrics == nil {
metrics = NewGroupMetrics(o.Opts.Registerer)
}
key := GroupKey(o.File, o.Name)
metrics.IterationsMissed.WithLabelValues(key)
metrics.IterationsScheduled.WithLabelValues(key)
metrics.EvalTotal.WithLabelValues(key)
metrics.EvalFailures.WithLabelValues(key)
metrics.GroupLastEvalTime.WithLabelValues(key)
metrics.GroupLastDuration.WithLabelValues(key)
metrics.GroupRules.WithLabelValues(key).Set(float64(len(o.Rules)))
metrics.GroupSamples.WithLabelValues(key)
metrics.GroupInterval.WithLabelValues(key).Set(o.Interval.Seconds())
evalIterationFunc := o.EvalIterationFunc
if evalIterationFunc == nil {
evalIterationFunc = DefaultEvalIterationFunc
}
return &Group{
name: o.Name,
file: o.File,
interval: o.Interval,
evaluationDelay: o.EvaluationDelay,
limit: o.Limit,
rules: o.Rules,
shouldRestore: o.ShouldRestore,
opts: o.Opts,
sourceTenants: o.SourceTenants,
seriesInPreviousEval: make([]map[string]labels.Labels, len(o.Rules)),
done: make(chan struct{}),
managerDone: o.done,
terminated: make(chan struct{}),
logger: log.With(o.Opts.Logger, "file", o.File, "group", o.Name),
metrics: metrics,
evalIterationFunc: evalIterationFunc,
alignEvaluationTimeOnInterval: o.AlignEvaluationTimeOnInterval,
}
}
// Name returns the group name.
func (g *Group) Name() string { return g.name }
// File returns the group's file.
func (g *Group) File() string { return g.file }
// Rules returns the group's rules.
func (g *Group) Rules() []Rule { return g.rules }
// Queryable returns the group's querable.
func (g *Group) Queryable() storage.Queryable { return g.opts.Queryable }
// Context returns the group's context.
func (g *Group) Context() context.Context { return g.opts.Context }
// Interval returns the group's interval.
func (g *Group) Interval() time.Duration { return g.interval }
// Limit returns the group's limit.
func (g *Group) Limit() int { return g.limit }
// SourceTenants returns the source tenants for the group.
// If it's empty or nil, then the owning user/tenant is considered to be the source tenant.
func (g *Group) SourceTenants() []string { return g.sourceTenants }
func (g *Group) Logger() log.Logger { return g.logger }
func (g *Group) run(ctx context.Context) {
defer close(g.terminated)
// Wait an initial amount to have consistently slotted intervals.
evalTimestamp := g.EvalTimestamp(time.Now().UnixNano()).Add(g.interval)
select {
case <-time.After(time.Until(evalTimestamp)):
case <-g.done:
return
}
ctx = promql.NewOriginContext(ctx, map[string]interface{}{
"ruleGroup": map[string]string{
"file": g.File(),
"name": g.Name(),
},
})
// The assumption here is that since the ticker was started after having
// waited for `evalTimestamp` to pass, the ticks will trigger soon
// after each `evalTimestamp + N * g.interval` occurrence.
tick := time.NewTicker(g.interval)
defer tick.Stop()
defer func() {
if !g.markStale {
return
}
go func(now time.Time) {
for _, rule := range g.seriesInPreviousEval {
for _, r := range rule {
g.staleSeries = append(g.staleSeries, r)
}
}
// That can be garbage collected at this point.
g.seriesInPreviousEval = nil
// Wait for 2 intervals to give the opportunity to renamed rules
// to insert new series in the tsdb. At this point if there is a
// renamed rule, it should already be started.
select {
case <-g.managerDone:
case <-time.After(2 * g.interval):
g.cleanupStaleSeries(ctx, now)
}
}(time.Now())
}()
g.evalIterationFunc(ctx, g, evalTimestamp)
if g.shouldRestore {
// If we have to restore, we wait for another Eval to finish.
// The reason behind this is, during first eval (or before it)
// we might not have enough data scraped, and recording rules would not
// have updated the latest values, on which some alerts might depend.
select {
case <-g.done:
return
case <-tick.C:
missed := (time.Since(evalTimestamp) / g.interval) - 1
if missed > 0 {
g.metrics.IterationsMissed.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed))
g.metrics.IterationsScheduled.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed))
}
evalTimestamp = evalTimestamp.Add((missed + 1) * g.interval)
g.evalIterationFunc(ctx, g, evalTimestamp)
}
g.RestoreForState(time.Now())
g.shouldRestore = false
}
for {
select {
case <-g.done:
return
default:
select {
case <-g.done:
return
case <-tick.C:
missed := (time.Since(evalTimestamp) / g.interval) - 1
if missed > 0 {
g.metrics.IterationsMissed.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed))
g.metrics.IterationsScheduled.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed))
}
evalTimestamp = evalTimestamp.Add((missed + 1) * g.interval)
g.evalIterationFunc(ctx, g, evalTimestamp)
}
}
}
}
func (g *Group) stop() {
close(g.done)
<-g.terminated
}
func (g *Group) hash() uint64 {
l := labels.New(
labels.Label{Name: "name", Value: g.name},
labels.Label{Name: "file", Value: g.file},
)
return l.Hash()
}
// AlertingRules returns the list of the group's alerting rules.
func (g *Group) AlertingRules() []*AlertingRule {
g.mtx.Lock()
defer g.mtx.Unlock()
var alerts []*AlertingRule
for _, rule := range g.rules {
if alertingRule, ok := rule.(*AlertingRule); ok {
alerts = append(alerts, alertingRule)
}
}
slices.SortFunc(alerts, func(a, b *AlertingRule) int {
if a.State() == b.State() {
return strings.Compare(a.Name(), b.Name())
}
return int(b.State() - a.State())
})
return alerts
}
// HasAlertingRules returns true if the group contains at least one AlertingRule.
func (g *Group) HasAlertingRules() bool {
g.mtx.Lock()
defer g.mtx.Unlock()
for _, rule := range g.rules {
if _, ok := rule.(*AlertingRule); ok {
return true
}
}
return false
}
// GetEvaluationTime returns the time in seconds it took to evaluate the rule group.
func (g *Group) GetEvaluationTime() time.Duration {
g.mtx.Lock()
defer g.mtx.Unlock()
return g.evaluationTime
}
// setEvaluationTime sets the time in seconds the last evaluation took.
func (g *Group) setEvaluationTime(dur time.Duration) {
g.metrics.GroupLastDuration.WithLabelValues(GroupKey(g.file, g.name)).Set(dur.Seconds())
g.mtx.Lock()
defer g.mtx.Unlock()
g.evaluationTime = dur
}
// GetLastEvaluation returns the time the last evaluation of the rule group took place.
func (g *Group) GetLastEvaluation() time.Time {
g.mtx.Lock()
defer g.mtx.Unlock()
return g.lastEvaluation
}
// setLastEvaluation updates evaluationTimestamp to the timestamp of when the rule group was last evaluated.
func (g *Group) setLastEvaluation(ts time.Time) {
g.metrics.GroupLastEvalTime.WithLabelValues(GroupKey(g.file, g.name)).Set(float64(ts.UnixNano()) / 1e9)
g.mtx.Lock()
defer g.mtx.Unlock()
g.lastEvaluation = ts
}
// GetLastEvalTimestamp returns the timestamp of the last evaluation.
func (g *Group) GetLastEvalTimestamp() time.Time {
g.mtx.Lock()
defer g.mtx.Unlock()
return g.lastEvalTimestamp
}
// setLastEvalTimestamp updates lastEvalTimestamp to the timestamp of the last evaluation.
func (g *Group) setLastEvalTimestamp(ts time.Time) {
g.mtx.Lock()
defer g.mtx.Unlock()
g.lastEvalTimestamp = ts
}
// EvalTimestamp returns the immediately preceding consistently slotted evaluation time.
func (g *Group) EvalTimestamp(startTime int64) time.Time {
var offset int64
if !g.alignEvaluationTimeOnInterval {
offset = int64(g.hash() % uint64(g.interval))
}
var (
// This group's evaluation times differ from the perfect time intervals by `offset` nanoseconds.
// But we can only use `% interval` to align with the interval. And `% interval` will always
// align with the perfect time intervals, instead of this group's. Because of this we add
// `offset` _after_ aligning with the perfect time interval.
//
// There can be cases where adding `offset` to the perfect evaluation time can yield a
// timestamp in the future, which is not what EvalTimestamp should do.
// So we subtract one `offset` to make sure that `now - (now % interval) + offset` gives an
// evaluation time in the past.
adjNow = startTime - offset
// Adjust to perfect evaluation intervals.
base = adjNow - (adjNow % int64(g.interval))
// Add one offset to randomize the evaluation times of this group.
next = base + offset
)
return time.Unix(0, next).UTC()
}
func nameAndLabels(rule Rule) string {
return rule.Name() + rule.Labels().String()
}
// CopyState copies the alerting rule and staleness related state from the given group.
//
// Rules are matched based on their name and labels. If there are duplicates, the
// first is matched with the first, second with the second etc.
func (g *Group) CopyState(from *Group) {
g.evaluationTime = from.evaluationTime
g.lastEvaluation = from.lastEvaluation
ruleMap := make(map[string][]int, len(from.rules))
for fi, fromRule := range from.rules {
nameAndLabels := nameAndLabels(fromRule)
l := ruleMap[nameAndLabels]
ruleMap[nameAndLabels] = append(l, fi)
}
for i, rule := range g.rules {
nameAndLabels := nameAndLabels(rule)
indexes := ruleMap[nameAndLabels]
if len(indexes) == 0 {
continue
}
fi := indexes[0]
g.seriesInPreviousEval[i] = from.seriesInPreviousEval[fi]
ruleMap[nameAndLabels] = indexes[1:]
ar, ok := rule.(*AlertingRule)
if !ok {
continue
}
far, ok := from.rules[fi].(*AlertingRule)
if !ok {
continue
}
for fp, a := range far.active {
ar.active[fp] = a
}
}
// Handle deleted and unmatched duplicate rules.
g.staleSeries = from.staleSeries
for fi, fromRule := range from.rules {
nameAndLabels := nameAndLabels(fromRule)
l := ruleMap[nameAndLabels]
if len(l) != 0 {
for _, series := range from.seriesInPreviousEval[fi] {
g.staleSeries = append(g.staleSeries, series)
}
}
}
}
// Eval runs a single evaluation cycle in which all rules are evaluated sequentially.
func (g *Group) Eval(ctx context.Context, ts time.Time) {
var samplesTotal float64
evaluationDelay := g.EvaluationDelay()
for i, rule := range g.rules {
select {
case <-g.done:
return
default:
}
func(i int, rule Rule) {
logger := log.WithPrefix(g.logger, "name", rule.Name(), "index", i)
ctx, sp := otel.Tracer("").Start(ctx, "rule")
sp.SetAttributes(attribute.String("name", rule.Name()))
defer func(t time.Time) {
sp.End()
since := time.Since(t)
g.metrics.EvalDuration.Observe(since.Seconds())
rule.SetEvaluationDuration(since)
rule.SetEvaluationTimestamp(t)
}(time.Now())
if sp.SpanContext().IsSampled() && sp.SpanContext().HasTraceID() {
logger = log.WithPrefix(g.logger, "traceID", sp.SpanContext().TraceID())
}
g.metrics.EvalTotal.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
vector, err := rule.Eval(ctx, evaluationDelay, ts, g.opts.QueryFunc, g.opts.ExternalURL, g.Limit())
if err != nil {
rule.SetHealth(HealthBad)
rule.SetLastError(err)
sp.SetStatus(codes.Error, err.Error())
g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
// Canceled queries are intentional termination of queries. This normally
// happens on shutdown and thus we skip logging of any errors here.
var eqc promql.ErrQueryCanceled
if !errors.As(err, &eqc) {
level.Warn(logger).Log("msg", "Evaluating rule failed", "rule", rule, "err", err)
}
return
}
rule.SetHealth(HealthGood)
rule.SetLastError(nil)
samplesTotal += float64(len(vector))
if ar, ok := rule.(*AlertingRule); ok {
ar.sendAlerts(ctx, ts, g.opts.ResendDelay, g.interval, g.opts.NotifyFunc)
}
var (
numOutOfOrder = 0
numTooOld = 0
numDuplicates = 0
)
app := g.opts.Appendable.Appender(ctx)
seriesReturned := make(map[string]labels.Labels, len(g.seriesInPreviousEval[i]))
defer func() {
if err := app.Commit(); err != nil {
rule.SetHealth(HealthBad)
rule.SetLastError(err)
sp.SetStatus(codes.Error, err.Error())
g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
level.Warn(logger).Log("msg", "Rule sample appending failed", "err", err)
return
}
g.seriesInPreviousEval[i] = seriesReturned
}()
for _, s := range vector {
if s.H != nil {
_, err = app.AppendHistogram(0, s.Metric, s.T, nil, s.H)
} else {
_, err = app.Append(0, s.Metric, s.T, s.F)
}
if err != nil {
rule.SetHealth(HealthBad)
rule.SetLastError(err)
sp.SetStatus(codes.Error, err.Error())
unwrappedErr := errors.Unwrap(err)
if unwrappedErr == nil {
unwrappedErr = err
}
switch {
case errors.Is(unwrappedErr, storage.ErrOutOfOrderSample):
numOutOfOrder++
level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
case errors.Is(unwrappedErr, storage.ErrTooOldSample):
numTooOld++
level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
case errors.Is(unwrappedErr, storage.ErrDuplicateSampleForTimestamp):
numDuplicates++
level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
default:
level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
}
} else {
buf := [1024]byte{}
seriesReturned[string(s.Metric.Bytes(buf[:]))] = s.Metric
}
}
if numOutOfOrder > 0 {
level.Warn(logger).Log("msg", "Error on ingesting out-of-order result from rule evaluation", "numDropped", numOutOfOrder)
}
if numTooOld > 0 {
level.Warn(logger).Log("msg", "Error on ingesting too old result from rule evaluation", "numDropped", numTooOld)
}
if numDuplicates > 0 {
level.Warn(logger).Log("msg", "Error on ingesting results from rule evaluation with different value but same timestamp", "numDropped", numDuplicates)
}
for metric, lset := range g.seriesInPreviousEval[i] {
if _, ok := seriesReturned[metric]; !ok {
// Series no longer exposed, mark it stale.
_, err = app.Append(0, lset, timestamp.FromTime(ts.Add(-evaluationDelay)), math.Float64frombits(value.StaleNaN))
unwrappedErr := errors.Unwrap(err)
if unwrappedErr == nil {
unwrappedErr = err
}
switch {
case unwrappedErr == nil:
case errors.Is(unwrappedErr, storage.ErrOutOfOrderSample),
errors.Is(unwrappedErr, storage.ErrTooOldSample),
errors.Is(unwrappedErr, storage.ErrDuplicateSampleForTimestamp):
// Do not count these in logging, as this is expected if series
// is exposed from a different rule.
default:
level.Warn(logger).Log("msg", "Adding stale sample failed", "sample", lset.String(), "err", err)
}
}
}
}(i, rule)
}
if g.metrics != nil {
g.metrics.GroupSamples.WithLabelValues(GroupKey(g.File(), g.Name())).Set(samplesTotal)
}
g.cleanupStaleSeries(ctx, ts)
}
func (g *Group) EvaluationDelay() time.Duration {
if g.evaluationDelay != nil {
return *g.evaluationDelay
}
if g.opts.DefaultEvaluationDelay != nil {
return g.opts.DefaultEvaluationDelay()
}
return time.Duration(0)
}
func (g *Group) cleanupStaleSeries(ctx context.Context, ts time.Time) {
if len(g.staleSeries) == 0 {
return
}
app := g.opts.Appendable.Appender(ctx)
evaluationDelay := g.EvaluationDelay()
for _, s := range g.staleSeries {
// Rule that produced series no longer configured, mark it stale.
_, err := app.Append(0, s, timestamp.FromTime(ts.Add(-evaluationDelay)), math.Float64frombits(value.StaleNaN))
unwrappedErr := errors.Unwrap(err)
if unwrappedErr == nil {
unwrappedErr = err
}
switch {
case unwrappedErr == nil:
case errors.Is(unwrappedErr, storage.ErrOutOfOrderSample),
errors.Is(unwrappedErr, storage.ErrTooOldSample),
errors.Is(unwrappedErr, storage.ErrDuplicateSampleForTimestamp):
// Do not count these in logging, as this is expected if series
// is exposed from a different rule.
default:
level.Warn(g.logger).Log("msg", "Adding stale sample for previous configuration failed", "sample", s, "err", err)
}
}
if err := app.Commit(); err != nil {
level.Warn(g.logger).Log("msg", "Stale sample appending for previous configuration failed", "err", err)
} else {
g.staleSeries = nil
}
}
// RestoreForState restores the 'for' state of the alerts
// by looking up last ActiveAt from storage.
func (g *Group) RestoreForState(ts time.Time) {
maxtMS := int64(model.TimeFromUnixNano(ts.UnixNano()))
// We allow restoration only if alerts were active before after certain time.
mint := ts.Add(-g.opts.OutageTolerance)
mintMS := int64(model.TimeFromUnixNano(mint.UnixNano()))
q, err := g.opts.Queryable.Querier(mintMS, maxtMS)
if err != nil {
level.Error(g.logger).Log("msg", "Failed to get Querier", "err", err)
return
}
defer func() {
if err := q.Close(); err != nil {
level.Error(g.logger).Log("msg", "Failed to close Querier", "err", err)
}
}()
for _, rule := range g.Rules() {
alertRule, ok := rule.(*AlertingRule)
if !ok {
continue
}
alertHoldDuration := alertRule.HoldDuration()
if alertHoldDuration < g.opts.ForGracePeriod {
// If alertHoldDuration is already less than grace period, we would not
// like to make it wait for `g.opts.ForGracePeriod` time before firing.
// Hence we skip restoration, which will make it wait for alertHoldDuration.
alertRule.SetRestored(true)
continue
}
alertRule.ForEachActiveAlert(func(a *Alert) {
var s storage.Series
s, err := alertRule.QueryforStateSeries(g.opts.Context, a, q)
if err != nil {
// Querier Warnings are ignored. We do not care unless we have an error.
level.Error(g.logger).Log(
"msg", "Failed to restore 'for' state",
labels.AlertName, alertRule.Name(),
"stage", "Select",
"err", err,
)
return
}
if s == nil {
return
}
// Series found for the 'for' state.
var t int64
var v float64
it := s.Iterator(nil)
for it.Next() == chunkenc.ValFloat {
t, v = it.At()
}
if it.Err() != nil {
level.Error(g.logger).Log("msg", "Failed to restore 'for' state",
labels.AlertName, alertRule.Name(), "stage", "Iterator", "err", it.Err())
return
}
if value.IsStaleNaN(v) { // Alert was not active.
return
}
downAt := time.Unix(t/1000, 0).UTC()
restoredActiveAt := time.Unix(int64(v), 0).UTC()
timeSpentPending := downAt.Sub(restoredActiveAt)
timeRemainingPending := alertHoldDuration - timeSpentPending
switch {
case timeRemainingPending <= 0:
// It means that alert was firing when prometheus went down.
// In the next Eval, the state of this alert will be set back to
// firing again if it's still firing in that Eval.
// Nothing to be done in this case.
case timeRemainingPending < g.opts.ForGracePeriod:
// (new) restoredActiveAt = (ts + m.opts.ForGracePeriod) - alertHoldDuration
// /* new firing time */ /* moving back by hold duration */
//
// Proof of correctness:
// firingTime = restoredActiveAt.Add(alertHoldDuration)
// = ts + m.opts.ForGracePeriod - alertHoldDuration + alertHoldDuration
// = ts + m.opts.ForGracePeriod
//
// Time remaining to fire = firingTime.Sub(ts)
// = (ts + m.opts.ForGracePeriod) - ts
// = m.opts.ForGracePeriod
restoredActiveAt = ts.Add(g.opts.ForGracePeriod).Add(-alertHoldDuration)
default:
// By shifting ActiveAt to the future (ActiveAt + some_duration),
// the total pending time from the original ActiveAt
// would be `alertHoldDuration + some_duration`.
// Here, some_duration = downDuration.
downDuration := ts.Sub(downAt)
restoredActiveAt = restoredActiveAt.Add(downDuration)
}
a.ActiveAt = restoredActiveAt
level.Debug(g.logger).Log("msg", "'for' state restored",
labels.AlertName, alertRule.Name(), "restored_time", a.ActiveAt.Format(time.RFC850),
"labels", a.Labels.String())
})
alertRule.SetRestored(true)
}
}
// Equals return if two groups are the same.
func (g *Group) Equals(ng *Group) bool {
if g.name != ng.name {
return false
}
if g.file != ng.file {
return false
}
if g.interval != ng.interval {
return false
}
if g.limit != ng.limit {
return false
}
if len(g.rules) != len(ng.rules) {
return false
}
if g.alignEvaluationTimeOnInterval != ng.alignEvaluationTimeOnInterval {
return false
}
for i, gr := range g.rules {
if gr.String() != ng.rules[i].String() {
return false
}
}
{
// compare source tenants
if len(g.sourceTenants) != len(ng.sourceTenants) {
return false
}
copyAndSort := func(x []string) []string {
copied := make([]string, len(x))
copy(copied, x)
sort.Strings(copied)
return copied
}
ngSourceTenantsCopy := copyAndSort(ng.sourceTenants)
gSourceTenantsCopy := copyAndSort(g.sourceTenants)
for i := range ngSourceTenantsCopy {
if gSourceTenantsCopy[i] != ngSourceTenantsCopy[i] {
return false
}
}
}
return true
}
// GroupKey group names need not be unique across filenames.
func GroupKey(file, name string) string {
return file + ";" + name
}
// Constants for instrumentation.
const namespace = "prometheus"
// Metrics for rule evaluation.
type Metrics struct {
EvalDuration prometheus.Summary
IterationDuration prometheus.Summary
IterationsMissed *prometheus.CounterVec
IterationsScheduled *prometheus.CounterVec
EvalTotal *prometheus.CounterVec
EvalFailures *prometheus.CounterVec
GroupInterval *prometheus.GaugeVec
GroupLastEvalTime *prometheus.GaugeVec
GroupLastDuration *prometheus.GaugeVec
GroupRules *prometheus.GaugeVec
GroupSamples *prometheus.GaugeVec
}
// NewGroupMetrics creates a new instance of Metrics and registers it with the provided registerer,
// if not nil.
func NewGroupMetrics(reg prometheus.Registerer) *Metrics {
m := &Metrics{
EvalDuration: prometheus.NewSummary(
prometheus.SummaryOpts{
Namespace: namespace,
Name: "rule_evaluation_duration_seconds",
Help: "The duration for a rule to execute.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
}),
IterationDuration: prometheus.NewSummary(prometheus.SummaryOpts{
Namespace: namespace,
Name: "rule_group_duration_seconds",
Help: "The duration of rule group evaluations.",
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
}),
IterationsMissed: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Name: "rule_group_iterations_missed_total",
Help: "The total number of rule group evaluations missed due to slow rule group evaluation.",
},
[]string{"rule_group"},
),
IterationsScheduled: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Name: "rule_group_iterations_total",
Help: "The total number of scheduled rule group evaluations, whether executed or missed.",
},
[]string{"rule_group"},
),
EvalTotal: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Name: "rule_evaluations_total",
Help: "The total number of rule evaluations.",
},
[]string{"rule_group"},
),
EvalFailures: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Name: "rule_evaluation_failures_total",
Help: "The total number of rule evaluation failures.",
},
[]string{"rule_group"},
),
GroupInterval: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "rule_group_interval_seconds",
Help: "The interval of a rule group.",
},
[]string{"rule_group"},
),
GroupLastEvalTime: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "rule_group_last_evaluation_timestamp_seconds",
Help: "The timestamp of the last rule group evaluation in seconds.",
},
[]string{"rule_group"},
),
GroupLastDuration: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "rule_group_last_duration_seconds",
Help: "The duration of the last rule group evaluation.",
},
[]string{"rule_group"},
),
GroupRules: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "rule_group_rules",
Help: "The number of rules.",
},
[]string{"rule_group"},
),
GroupSamples: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "rule_group_last_evaluation_samples",
Help: "The number of samples returned during the last rule group evaluation.",
},
[]string{"rule_group"},
),
}
if reg != nil {
reg.MustRegister(
m.EvalDuration,
m.IterationDuration,
m.IterationsMissed,
m.IterationsScheduled,
m.EvalTotal,
m.EvalFailures,
m.GroupInterval,
m.GroupLastEvalTime,
m.GroupLastDuration,
m.GroupRules,
m.GroupSamples,
)
}
return m
}

View file

@ -17,9 +17,7 @@ import (
"context"
"errors"
"fmt"
"math"
"net/url"
"sort"
"strings"
"sync"
"time"
@ -27,162 +25,17 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"golang.org/x/exp/slices"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/rulefmt"
"github.com/prometheus/prometheus/model/timestamp"
"github.com/prometheus/prometheus/model/value"
"github.com/prometheus/prometheus/notifier"
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/tsdb/chunkenc"
"github.com/prometheus/prometheus/util/strutil"
)
// RuleHealth describes the health state of a rule.
type RuleHealth string
// The possible health states of a rule based on the last execution.
const (
HealthUnknown RuleHealth = "unknown"
HealthGood RuleHealth = "ok"
HealthBad RuleHealth = "err"
)
// Constants for instrumentation.
const namespace = "prometheus"
// Metrics for rule evaluation.
type Metrics struct {
EvalDuration prometheus.Summary
IterationDuration prometheus.Summary
IterationsMissed *prometheus.CounterVec
IterationsScheduled *prometheus.CounterVec
EvalTotal *prometheus.CounterVec
EvalFailures *prometheus.CounterVec
GroupInterval *prometheus.GaugeVec
GroupLastEvalTime *prometheus.GaugeVec
GroupLastDuration *prometheus.GaugeVec
GroupRules *prometheus.GaugeVec
GroupSamples *prometheus.GaugeVec
}
// NewGroupMetrics creates a new instance of Metrics and registers it with the provided registerer,
// if not nil.
func NewGroupMetrics(reg prometheus.Registerer) *Metrics {
m := &Metrics{
EvalDuration: prometheus.NewSummary(
prometheus.SummaryOpts{
Namespace: namespace,
Name: "rule_evaluation_duration_seconds",
Help: "The duration for a rule to execute.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
}),
IterationDuration: prometheus.NewSummary(prometheus.SummaryOpts{
Namespace: namespace,
Name: "rule_group_duration_seconds",
Help: "The duration of rule group evaluations.",
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
}),
IterationsMissed: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Name: "rule_group_iterations_missed_total",
Help: "The total number of rule group evaluations missed due to slow rule group evaluation.",
},
[]string{"rule_group"},
),
IterationsScheduled: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Name: "rule_group_iterations_total",
Help: "The total number of scheduled rule group evaluations, whether executed or missed.",
},
[]string{"rule_group"},
),
EvalTotal: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Name: "rule_evaluations_total",
Help: "The total number of rule evaluations.",
},
[]string{"rule_group"},
),
EvalFailures: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Name: "rule_evaluation_failures_total",
Help: "The total number of rule evaluation failures.",
},
[]string{"rule_group"},
),
GroupInterval: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "rule_group_interval_seconds",
Help: "The interval of a rule group.",
},
[]string{"rule_group"},
),
GroupLastEvalTime: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "rule_group_last_evaluation_timestamp_seconds",
Help: "The timestamp of the last rule group evaluation in seconds.",
},
[]string{"rule_group"},
),
GroupLastDuration: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "rule_group_last_duration_seconds",
Help: "The duration of the last rule group evaluation.",
},
[]string{"rule_group"},
),
GroupRules: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "rule_group_rules",
Help: "The number of rules.",
},
[]string{"rule_group"},
),
GroupSamples: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "rule_group_last_evaluation_samples",
Help: "The number of samples returned during the last rule group evaluation.",
},
[]string{"rule_group"},
),
}
if reg != nil {
reg.MustRegister(
m.EvalDuration,
m.IterationDuration,
m.IterationsMissed,
m.IterationsScheduled,
m.EvalTotal,
m.EvalFailures,
m.GroupInterval,
m.GroupLastEvalTime,
m.GroupLastDuration,
m.GroupRules,
m.GroupSamples,
)
}
return m
}
// QueryFunc processes PromQL queries.
type QueryFunc func(ctx context.Context, q string, t time.Time) (promql.Vector, error)
@ -214,256 +67,6 @@ func EngineQueryFunc(engine *promql.Engine, q storage.Queryable) QueryFunc {
}
}
// A Rule encapsulates a vector expression which is evaluated at a specified
// interval and acted upon (currently either recorded or used for alerting).
type Rule interface {
Name() string
// Labels of the rule.
Labels() labels.Labels
// Eval evaluates the rule, including any associated recording or alerting actions.
// The duration passed is the evaluation delay.
Eval(context.Context, time.Duration, time.Time, QueryFunc, *url.URL, int) (promql.Vector, error)
// String returns a human-readable string representation of the rule.
String() string
// Query returns the rule query expression.
Query() parser.Expr
// SetLastErr sets the current error experienced by the rule.
SetLastError(error)
// LastErr returns the last error experienced by the rule.
LastError() error
// SetHealth sets the current health of the rule.
SetHealth(RuleHealth)
// Health returns the current health of the rule.
Health() RuleHealth
SetEvaluationDuration(time.Duration)
// GetEvaluationDuration returns last evaluation duration.
// NOTE: Used dynamically by rules.html template.
GetEvaluationDuration() time.Duration
SetEvaluationTimestamp(time.Time)
// GetEvaluationTimestamp returns last evaluation timestamp.
// NOTE: Used dynamically by rules.html template.
GetEvaluationTimestamp() time.Time
}
// Group is a set of rules that have a logical relation.
type Group struct {
name string
file string
interval time.Duration
evaluationDelay *time.Duration
limit int
rules []Rule
sourceTenants []string
seriesInPreviousEval []map[string]labels.Labels // One per Rule.
staleSeries []labels.Labels
opts *ManagerOptions
mtx sync.Mutex
evaluationTime time.Duration
lastEvaluation time.Time // Wall-clock time of most recent evaluation.
lastEvalTimestamp time.Time // Time slot used for most recent evaluation.
shouldRestore bool
markStale bool
done chan struct{}
terminated chan struct{}
managerDone chan struct{}
logger log.Logger
metrics *Metrics
// Rule group evaluation iteration function,
// defaults to DefaultEvalIterationFunc.
evalIterationFunc GroupEvalIterationFunc
alignEvaluationTimeOnInterval bool
}
// GroupEvalIterationFunc is used to implement and extend rule group
// evaluation iteration logic. It is configured in Group.evalIterationFunc,
// and periodically invoked at each group evaluation interval to
// evaluate the rules in the group at that point in time.
// DefaultEvalIterationFunc is the default implementation.
type GroupEvalIterationFunc func(ctx context.Context, g *Group, evalTimestamp time.Time)
type GroupOptions struct {
Name, File string
Interval time.Duration
Limit int
Rules []Rule
SourceTenants []string
ShouldRestore bool
Opts *ManagerOptions
EvaluationDelay *time.Duration
done chan struct{}
EvalIterationFunc GroupEvalIterationFunc
AlignEvaluationTimeOnInterval bool
}
// NewGroup makes a new Group with the given name, options, and rules.
func NewGroup(o GroupOptions) *Group {
metrics := o.Opts.Metrics
if metrics == nil {
metrics = NewGroupMetrics(o.Opts.Registerer)
}
key := GroupKey(o.File, o.Name)
metrics.IterationsMissed.WithLabelValues(key)
metrics.IterationsScheduled.WithLabelValues(key)
metrics.EvalTotal.WithLabelValues(key)
metrics.EvalFailures.WithLabelValues(key)
metrics.GroupLastEvalTime.WithLabelValues(key)
metrics.GroupLastDuration.WithLabelValues(key)
metrics.GroupRules.WithLabelValues(key).Set(float64(len(o.Rules)))
metrics.GroupSamples.WithLabelValues(key)
metrics.GroupInterval.WithLabelValues(key).Set(o.Interval.Seconds())
evalIterationFunc := o.EvalIterationFunc
if evalIterationFunc == nil {
evalIterationFunc = DefaultEvalIterationFunc
}
return &Group{
name: o.Name,
file: o.File,
interval: o.Interval,
evaluationDelay: o.EvaluationDelay,
limit: o.Limit,
rules: o.Rules,
shouldRestore: o.ShouldRestore,
opts: o.Opts,
sourceTenants: o.SourceTenants,
seriesInPreviousEval: make([]map[string]labels.Labels, len(o.Rules)),
done: make(chan struct{}),
managerDone: o.done,
terminated: make(chan struct{}),
logger: log.With(o.Opts.Logger, "file", o.File, "group", o.Name),
metrics: metrics,
evalIterationFunc: evalIterationFunc,
alignEvaluationTimeOnInterval: o.AlignEvaluationTimeOnInterval,
}
}
// Name returns the group name.
func (g *Group) Name() string { return g.name }
// File returns the group's file.
func (g *Group) File() string { return g.file }
// Rules returns the group's rules.
func (g *Group) Rules() []Rule { return g.rules }
// Queryable returns the group's querable.
func (g *Group) Queryable() storage.Queryable { return g.opts.Queryable }
// Context returns the group's context.
func (g *Group) Context() context.Context { return g.opts.Context }
// Interval returns the group's interval.
func (g *Group) Interval() time.Duration { return g.interval }
// Limit returns the group's limit.
func (g *Group) Limit() int { return g.limit }
// SourceTenants returns the source tenants for the group.
// If it's empty or nil, then the owning user/tenant is considered to be the source tenant.
func (g *Group) SourceTenants() []string { return g.sourceTenants }
func (g *Group) Logger() log.Logger { return g.logger }
func (g *Group) run(ctx context.Context) {
defer close(g.terminated)
// Wait an initial amount to have consistently slotted intervals.
evalTimestamp := g.EvalTimestamp(time.Now().UnixNano()).Add(g.interval)
select {
case <-time.After(time.Until(evalTimestamp)):
case <-g.done:
return
}
ctx = promql.NewOriginContext(ctx, map[string]interface{}{
"ruleGroup": map[string]string{
"file": g.File(),
"name": g.Name(),
},
})
// The assumption here is that since the ticker was started after having
// waited for `evalTimestamp` to pass, the ticks will trigger soon
// after each `evalTimestamp + N * g.interval` occurrence.
tick := time.NewTicker(g.interval)
defer tick.Stop()
defer func() {
if !g.markStale {
return
}
go func(now time.Time) {
for _, rule := range g.seriesInPreviousEval {
for _, r := range rule {
g.staleSeries = append(g.staleSeries, r)
}
}
// That can be garbage collected at this point.
g.seriesInPreviousEval = nil
// Wait for 2 intervals to give the opportunity to renamed rules
// to insert new series in the tsdb. At this point if there is a
// renamed rule, it should already be started.
select {
case <-g.managerDone:
case <-time.After(2 * g.interval):
g.cleanupStaleSeries(ctx, now)
}
}(time.Now())
}()
g.evalIterationFunc(ctx, g, evalTimestamp)
if g.shouldRestore {
// If we have to restore, we wait for another Eval to finish.
// The reason behind this is, during first eval (or before it)
// we might not have enough data scraped, and recording rules would not
// have updated the latest values, on which some alerts might depend.
select {
case <-g.done:
return
case <-tick.C:
missed := (time.Since(evalTimestamp) / g.interval) - 1
if missed > 0 {
g.metrics.IterationsMissed.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed))
g.metrics.IterationsScheduled.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed))
}
evalTimestamp = evalTimestamp.Add((missed + 1) * g.interval)
g.evalIterationFunc(ctx, g, evalTimestamp)
}
g.RestoreForState(time.Now())
g.shouldRestore = false
}
for {
select {
case <-g.done:
return
default:
select {
case <-g.done:
return
case <-tick.C:
missed := (time.Since(evalTimestamp) / g.interval) - 1
if missed > 0 {
g.metrics.IterationsMissed.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed))
g.metrics.IterationsScheduled.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed))
}
evalTimestamp = evalTimestamp.Add((missed + 1) * g.interval)
g.evalIterationFunc(ctx, g, evalTimestamp)
}
}
}
}
// DefaultEvalIterationFunc is the default implementation of
// GroupEvalIterationFunc that is periodically invoked to evaluate the rules
// in a group at a given point in time and updates Group state and metrics
@ -483,536 +86,6 @@ func DefaultEvalIterationFunc(ctx context.Context, g *Group, evalTimestamp time.
g.setLastEvalTimestamp(evalTimestamp)
}
func (g *Group) stop() {
close(g.done)
<-g.terminated
}
func (g *Group) hash() uint64 {
l := labels.New(
labels.Label{Name: "name", Value: g.name},
labels.Label{Name: "file", Value: g.file},
)
return l.Hash()
}
// AlertingRules returns the list of the group's alerting rules.
func (g *Group) AlertingRules() []*AlertingRule {
g.mtx.Lock()
defer g.mtx.Unlock()
var alerts []*AlertingRule
for _, rule := range g.rules {
if alertingRule, ok := rule.(*AlertingRule); ok {
alerts = append(alerts, alertingRule)
}
}
slices.SortFunc(alerts, func(a, b *AlertingRule) int {
if a.State() == b.State() {
return strings.Compare(a.Name(), b.Name())
}
return int(b.State() - a.State())
})
return alerts
}
// HasAlertingRules returns true if the group contains at least one AlertingRule.
func (g *Group) HasAlertingRules() bool {
g.mtx.Lock()
defer g.mtx.Unlock()
for _, rule := range g.rules {
if _, ok := rule.(*AlertingRule); ok {
return true
}
}
return false
}
// GetEvaluationTime returns the time in seconds it took to evaluate the rule group.
func (g *Group) GetEvaluationTime() time.Duration {
g.mtx.Lock()
defer g.mtx.Unlock()
return g.evaluationTime
}
// setEvaluationTime sets the time in seconds the last evaluation took.
func (g *Group) setEvaluationTime(dur time.Duration) {
g.metrics.GroupLastDuration.WithLabelValues(GroupKey(g.file, g.name)).Set(dur.Seconds())
g.mtx.Lock()
defer g.mtx.Unlock()
g.evaluationTime = dur
}
// GetLastEvaluation returns the time the last evaluation of the rule group took place.
func (g *Group) GetLastEvaluation() time.Time {
g.mtx.Lock()
defer g.mtx.Unlock()
return g.lastEvaluation
}
// setLastEvaluation updates evaluationTimestamp to the timestamp of when the rule group was last evaluated.
func (g *Group) setLastEvaluation(ts time.Time) {
g.metrics.GroupLastEvalTime.WithLabelValues(GroupKey(g.file, g.name)).Set(float64(ts.UnixNano()) / 1e9)
g.mtx.Lock()
defer g.mtx.Unlock()
g.lastEvaluation = ts
}
// GetLastEvalTimestamp returns the timestamp of the last evaluation.
func (g *Group) GetLastEvalTimestamp() time.Time {
g.mtx.Lock()
defer g.mtx.Unlock()
return g.lastEvalTimestamp
}
// setLastEvalTimestamp updates lastEvalTimestamp to the timestamp of the last evaluation.
func (g *Group) setLastEvalTimestamp(ts time.Time) {
g.mtx.Lock()
defer g.mtx.Unlock()
g.lastEvalTimestamp = ts
}
// EvalTimestamp returns the immediately preceding consistently slotted evaluation time.
func (g *Group) EvalTimestamp(startTime int64) time.Time {
var offset int64
if !g.alignEvaluationTimeOnInterval {
offset = int64(g.hash() % uint64(g.interval))
}
var (
// This group's evaluation times differ from the perfect time intervals by `offset` nanoseconds.
// But we can only use `% interval` to align with the interval. And `% interval` will always
// align with the perfect time intervals, instead of this group's. Because of this we add
// `offset` _after_ aligning with the perfect time interval.
//
// There can be cases where adding `offset` to the perfect evaluation time can yield a
// timestamp in the future, which is not what EvalTimestamp should do.
// So we subtract one `offset` to make sure that `now - (now % interval) + offset` gives an
// evaluation time in the past.
adjNow = startTime - offset
// Adjust to perfect evaluation intervals.
base = adjNow - (adjNow % int64(g.interval))
// Add one offset to randomize the evaluation times of this group.
next = base + offset
)
return time.Unix(0, next).UTC()
}
func nameAndLabels(rule Rule) string {
return rule.Name() + rule.Labels().String()
}
// CopyState copies the alerting rule and staleness related state from the given group.
//
// Rules are matched based on their name and labels. If there are duplicates, the
// first is matched with the first, second with the second etc.
func (g *Group) CopyState(from *Group) {
g.evaluationTime = from.evaluationTime
g.lastEvaluation = from.lastEvaluation
ruleMap := make(map[string][]int, len(from.rules))
for fi, fromRule := range from.rules {
nameAndLabels := nameAndLabels(fromRule)
l := ruleMap[nameAndLabels]
ruleMap[nameAndLabels] = append(l, fi)
}
for i, rule := range g.rules {
nameAndLabels := nameAndLabels(rule)
indexes := ruleMap[nameAndLabels]
if len(indexes) == 0 {
continue
}
fi := indexes[0]
g.seriesInPreviousEval[i] = from.seriesInPreviousEval[fi]
ruleMap[nameAndLabels] = indexes[1:]
ar, ok := rule.(*AlertingRule)
if !ok {
continue
}
far, ok := from.rules[fi].(*AlertingRule)
if !ok {
continue
}
for fp, a := range far.active {
ar.active[fp] = a
}
}
// Handle deleted and unmatched duplicate rules.
g.staleSeries = from.staleSeries
for fi, fromRule := range from.rules {
nameAndLabels := nameAndLabels(fromRule)
l := ruleMap[nameAndLabels]
if len(l) != 0 {
for _, series := range from.seriesInPreviousEval[fi] {
g.staleSeries = append(g.staleSeries, series)
}
}
}
}
// Eval runs a single evaluation cycle in which all rules are evaluated sequentially.
func (g *Group) Eval(ctx context.Context, ts time.Time) {
var samplesTotal float64
evaluationDelay := g.EvaluationDelay()
for i, rule := range g.rules {
select {
case <-g.done:
return
default:
}
func(i int, rule Rule) {
logger := log.WithPrefix(g.logger, "name", rule.Name(), "index", i)
ctx, sp := otel.Tracer("").Start(ctx, "rule")
sp.SetAttributes(attribute.String("name", rule.Name()))
defer func(t time.Time) {
sp.End()
since := time.Since(t)
g.metrics.EvalDuration.Observe(since.Seconds())
rule.SetEvaluationDuration(since)
rule.SetEvaluationTimestamp(t)
}(time.Now())
if sp.SpanContext().IsSampled() && sp.SpanContext().HasTraceID() {
logger = log.WithPrefix(g.logger, "traceID", sp.SpanContext().TraceID())
}
g.metrics.EvalTotal.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
vector, err := rule.Eval(ctx, evaluationDelay, ts, g.opts.QueryFunc, g.opts.ExternalURL, g.Limit())
if err != nil {
rule.SetHealth(HealthBad)
rule.SetLastError(err)
sp.SetStatus(codes.Error, err.Error())
g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
// Canceled queries are intentional termination of queries. This normally
// happens on shutdown and thus we skip logging of any errors here.
var eqc promql.ErrQueryCanceled
if !errors.As(err, &eqc) {
level.Warn(logger).Log("msg", "Evaluating rule failed", "rule", rule, "err", err)
}
return
}
rule.SetHealth(HealthGood)
rule.SetLastError(nil)
samplesTotal += float64(len(vector))
if ar, ok := rule.(*AlertingRule); ok {
ar.sendAlerts(ctx, ts, g.opts.ResendDelay, g.interval, g.opts.NotifyFunc)
}
var (
numOutOfOrder = 0
numTooOld = 0
numDuplicates = 0
)
app := g.opts.Appendable.Appender(ctx)
seriesReturned := make(map[string]labels.Labels, len(g.seriesInPreviousEval[i]))
defer func() {
if err := app.Commit(); err != nil {
rule.SetHealth(HealthBad)
rule.SetLastError(err)
sp.SetStatus(codes.Error, err.Error())
g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
level.Warn(logger).Log("msg", "Rule sample appending failed", "err", err)
return
}
g.seriesInPreviousEval[i] = seriesReturned
}()
for _, s := range vector {
if s.H != nil {
_, err = app.AppendHistogram(0, s.Metric, s.T, nil, s.H)
} else {
_, err = app.Append(0, s.Metric, s.T, s.F)
}
if err != nil {
rule.SetHealth(HealthBad)
rule.SetLastError(err)
sp.SetStatus(codes.Error, err.Error())
unwrappedErr := errors.Unwrap(err)
if unwrappedErr == nil {
unwrappedErr = err
}
switch {
case errors.Is(unwrappedErr, storage.ErrOutOfOrderSample):
numOutOfOrder++
level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
case errors.Is(unwrappedErr, storage.ErrTooOldSample):
numTooOld++
level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
case errors.Is(unwrappedErr, storage.ErrDuplicateSampleForTimestamp):
numDuplicates++
level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
default:
level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
}
} else {
buf := [1024]byte{}
seriesReturned[string(s.Metric.Bytes(buf[:]))] = s.Metric
}
}
if numOutOfOrder > 0 {
level.Warn(logger).Log("msg", "Error on ingesting out-of-order result from rule evaluation", "numDropped", numOutOfOrder)
}
if numTooOld > 0 {
level.Warn(logger).Log("msg", "Error on ingesting too old result from rule evaluation", "numDropped", numTooOld)
}
if numDuplicates > 0 {
level.Warn(logger).Log("msg", "Error on ingesting results from rule evaluation with different value but same timestamp", "numDropped", numDuplicates)
}
for metric, lset := range g.seriesInPreviousEval[i] {
if _, ok := seriesReturned[metric]; !ok {
// Series no longer exposed, mark it stale.
_, err = app.Append(0, lset, timestamp.FromTime(ts.Add(-evaluationDelay)), math.Float64frombits(value.StaleNaN))
unwrappedErr := errors.Unwrap(err)
if unwrappedErr == nil {
unwrappedErr = err
}
switch {
case unwrappedErr == nil:
case errors.Is(unwrappedErr, storage.ErrOutOfOrderSample),
errors.Is(unwrappedErr, storage.ErrTooOldSample),
errors.Is(unwrappedErr, storage.ErrDuplicateSampleForTimestamp):
// Do not count these in logging, as this is expected if series
// is exposed from a different rule.
default:
level.Warn(logger).Log("msg", "Adding stale sample failed", "sample", lset.String(), "err", err)
}
}
}
}(i, rule)
}
if g.metrics != nil {
g.metrics.GroupSamples.WithLabelValues(GroupKey(g.File(), g.Name())).Set(samplesTotal)
}
g.cleanupStaleSeries(ctx, ts)
}
func (g *Group) EvaluationDelay() time.Duration {
if g.evaluationDelay != nil {
return *g.evaluationDelay
}
if g.opts.DefaultEvaluationDelay != nil {
return g.opts.DefaultEvaluationDelay()
}
return time.Duration(0)
}
func (g *Group) cleanupStaleSeries(ctx context.Context, ts time.Time) {
if len(g.staleSeries) == 0 {
return
}
app := g.opts.Appendable.Appender(ctx)
evaluationDelay := g.EvaluationDelay()
for _, s := range g.staleSeries {
// Rule that produced series no longer configured, mark it stale.
_, err := app.Append(0, s, timestamp.FromTime(ts.Add(-evaluationDelay)), math.Float64frombits(value.StaleNaN))
unwrappedErr := errors.Unwrap(err)
if unwrappedErr == nil {
unwrappedErr = err
}
switch {
case unwrappedErr == nil:
case errors.Is(unwrappedErr, storage.ErrOutOfOrderSample),
errors.Is(unwrappedErr, storage.ErrTooOldSample),
errors.Is(unwrappedErr, storage.ErrDuplicateSampleForTimestamp):
// Do not count these in logging, as this is expected if series
// is exposed from a different rule.
default:
level.Warn(g.logger).Log("msg", "Adding stale sample for previous configuration failed", "sample", s, "err", err)
}
}
if err := app.Commit(); err != nil {
level.Warn(g.logger).Log("msg", "Stale sample appending for previous configuration failed", "err", err)
} else {
g.staleSeries = nil
}
}
// RestoreForState restores the 'for' state of the alerts
// by looking up last ActiveAt from storage.
func (g *Group) RestoreForState(ts time.Time) {
maxtMS := int64(model.TimeFromUnixNano(ts.UnixNano()))
// We allow restoration only if alerts were active before after certain time.
mint := ts.Add(-g.opts.OutageTolerance)
mintMS := int64(model.TimeFromUnixNano(mint.UnixNano()))
q, err := g.opts.Queryable.Querier(mintMS, maxtMS)
if err != nil {
level.Error(g.logger).Log("msg", "Failed to get Querier", "err", err)
return
}
defer func() {
if err := q.Close(); err != nil {
level.Error(g.logger).Log("msg", "Failed to close Querier", "err", err)
}
}()
for _, rule := range g.Rules() {
alertRule, ok := rule.(*AlertingRule)
if !ok {
continue
}
alertHoldDuration := alertRule.HoldDuration()
if alertHoldDuration < g.opts.ForGracePeriod {
// If alertHoldDuration is already less than grace period, we would not
// like to make it wait for `g.opts.ForGracePeriod` time before firing.
// Hence we skip restoration, which will make it wait for alertHoldDuration.
alertRule.SetRestored(true)
continue
}
alertRule.ForEachActiveAlert(func(a *Alert) {
var s storage.Series
s, err := alertRule.QueryforStateSeries(g.opts.Context, a, q)
if err != nil {
// Querier Warnings are ignored. We do not care unless we have an error.
level.Error(g.logger).Log(
"msg", "Failed to restore 'for' state",
labels.AlertName, alertRule.Name(),
"stage", "Select",
"err", err,
)
return
}
if s == nil {
return
}
// Series found for the 'for' state.
var t int64
var v float64
it := s.Iterator(nil)
for it.Next() == chunkenc.ValFloat {
t, v = it.At()
}
if it.Err() != nil {
level.Error(g.logger).Log("msg", "Failed to restore 'for' state",
labels.AlertName, alertRule.Name(), "stage", "Iterator", "err", it.Err())
return
}
if value.IsStaleNaN(v) { // Alert was not active.
return
}
downAt := time.Unix(t/1000, 0).UTC()
restoredActiveAt := time.Unix(int64(v), 0).UTC()
timeSpentPending := downAt.Sub(restoredActiveAt)
timeRemainingPending := alertHoldDuration - timeSpentPending
switch {
case timeRemainingPending <= 0:
// It means that alert was firing when prometheus went down.
// In the next Eval, the state of this alert will be set back to
// firing again if it's still firing in that Eval.
// Nothing to be done in this case.
case timeRemainingPending < g.opts.ForGracePeriod:
// (new) restoredActiveAt = (ts + m.opts.ForGracePeriod) - alertHoldDuration
// /* new firing time */ /* moving back by hold duration */
//
// Proof of correctness:
// firingTime = restoredActiveAt.Add(alertHoldDuration)
// = ts + m.opts.ForGracePeriod - alertHoldDuration + alertHoldDuration
// = ts + m.opts.ForGracePeriod
//
// Time remaining to fire = firingTime.Sub(ts)
// = (ts + m.opts.ForGracePeriod) - ts
// = m.opts.ForGracePeriod
restoredActiveAt = ts.Add(g.opts.ForGracePeriod).Add(-alertHoldDuration)
default:
// By shifting ActiveAt to the future (ActiveAt + some_duration),
// the total pending time from the original ActiveAt
// would be `alertHoldDuration + some_duration`.
// Here, some_duration = downDuration.
downDuration := ts.Sub(downAt)
restoredActiveAt = restoredActiveAt.Add(downDuration)
}
a.ActiveAt = restoredActiveAt
level.Debug(g.logger).Log("msg", "'for' state restored",
labels.AlertName, alertRule.Name(), "restored_time", a.ActiveAt.Format(time.RFC850),
"labels", a.Labels.String())
})
alertRule.SetRestored(true)
}
}
// Equals return if two groups are the same.
func (g *Group) Equals(ng *Group) bool {
if g.name != ng.name {
return false
}
if g.file != ng.file {
return false
}
if g.interval != ng.interval {
return false
}
if g.limit != ng.limit {
return false
}
if len(g.rules) != len(ng.rules) {
return false
}
if g.alignEvaluationTimeOnInterval != ng.alignEvaluationTimeOnInterval {
return false
}
for i, gr := range g.rules {
if gr.String() != ng.rules[i].String() {
return false
}
}
{
// compare source tenants
if len(g.sourceTenants) != len(ng.sourceTenants) {
return false
}
copyAndSort := func(x []string) []string {
copied := make([]string, len(x))
copy(copied, x)
sort.Strings(copied)
return copied
}
ngSourceTenantsCopy := copyAndSort(ng.sourceTenants)
gSourceTenantsCopy := copyAndSort(g.sourceTenants)
for i := range ngSourceTenantsCopy {
if gSourceTenantsCopy[i] != ngSourceTenantsCopy[i] {
return false
}
}
}
return true
}
// The Manager manages recording and alerting rules.
type Manager struct {
opts *ManagerOptions
@ -1270,11 +343,6 @@ func (m *Manager) LoadGroups(
return groups, nil
}
// GroupKey group names need not be unique across filenames.
func GroupKey(file, name string) string {
return file + ";" + name
}
// RuleGroups returns the list of manager's rule groups.
func (m *Manager) RuleGroups() []*Group {
m.mtx.RLock()

65
rules/rule.go Normal file
View file

@ -0,0 +1,65 @@
// Copyright 2013 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package rules
import (
"context"
"net/url"
"time"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/promql/parser"
)
// RuleHealth describes the health state of a rule.
type RuleHealth string
// The possible health states of a rule based on the last execution.
const (
HealthUnknown RuleHealth = "unknown"
HealthGood RuleHealth = "ok"
HealthBad RuleHealth = "err"
)
// A Rule encapsulates a vector expression which is evaluated at a specified
// interval and acted upon (currently either recorded or used for alerting).
type Rule interface {
Name() string
// Labels of the rule.
Labels() labels.Labels
// Eval evaluates the rule, including any associated recording or alerting actions.
// The duration passed is the evaluation delay.
Eval(context.Context, time.Duration, time.Time, QueryFunc, *url.URL, int) (promql.Vector, error)
// String returns a human-readable string representation of the rule.
String() string
// Query returns the rule query expression.
Query() parser.Expr
// SetLastError sets the current error experienced by the rule.
SetLastError(error)
// LastError returns the last error experienced by the rule.
LastError() error
// SetHealth sets the current health of the rule.
SetHealth(RuleHealth)
// Health returns the current health of the rule.
Health() RuleHealth
SetEvaluationDuration(time.Duration)
// GetEvaluationDuration returns last evaluation duration.
// NOTE: Used dynamically by rules.html template.
GetEvaluationDuration() time.Duration
SetEvaluationTimestamp(time.Time)
// GetEvaluationTimestamp returns last evaluation timestamp.
// NOTE: Used dynamically by rules.html template.
GetEvaluationTimestamp() time.Time
}

View file

@ -34,80 +34,20 @@ import (
"github.com/prometheus/prometheus/util/osutil"
)
var targetMetadataCache = newMetadataMetricsCollector()
// MetadataMetricsCollector is a Custom Collector for the metadata cache metrics.
type MetadataMetricsCollector struct {
CacheEntries *prometheus.Desc
CacheBytes *prometheus.Desc
scrapeManager *Manager
}
func newMetadataMetricsCollector() *MetadataMetricsCollector {
return &MetadataMetricsCollector{
CacheEntries: prometheus.NewDesc(
"prometheus_target_metadata_cache_entries",
"Total number of metric metadata entries in the cache",
[]string{"scrape_job"},
nil,
),
CacheBytes: prometheus.NewDesc(
"prometheus_target_metadata_cache_bytes",
"The number of bytes that are currently used for storing metric metadata in the cache",
[]string{"scrape_job"},
nil,
),
}
}
func (mc *MetadataMetricsCollector) registerManager(m *Manager) {
mc.scrapeManager = m
}
// Describe sends the metrics descriptions to the channel.
func (mc *MetadataMetricsCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- mc.CacheEntries
ch <- mc.CacheBytes
}
// Collect creates and sends the metrics for the metadata cache.
func (mc *MetadataMetricsCollector) Collect(ch chan<- prometheus.Metric) {
if mc.scrapeManager == nil {
return
}
for tset, targets := range mc.scrapeManager.TargetsActive() {
var size, length int
for _, t := range targets {
size += t.MetadataSize()
length += t.MetadataLength()
}
ch <- prometheus.MustNewConstMetric(
mc.CacheEntries,
prometheus.GaugeValue,
float64(length),
tset,
)
ch <- prometheus.MustNewConstMetric(
mc.CacheBytes,
prometheus.GaugeValue,
float64(size),
tset,
)
}
}
// NewManager is the Manager constructor
func NewManager(o *Options, logger log.Logger, app storage.Appendable) *Manager {
func NewManager(o *Options, logger log.Logger, app storage.Appendable, registerer prometheus.Registerer) (*Manager, error) {
if o == nil {
o = &Options{}
}
if logger == nil {
logger = log.NewNopLogger()
}
sm, err := newScrapeMetrics(registerer)
if err != nil {
return nil, fmt.Errorf("failed to create scrape manager due to error: %w", err)
}
m := &Manager{
append: app,
opts: o,
@ -116,10 +56,12 @@ func NewManager(o *Options, logger log.Logger, app storage.Appendable) *Manager
scrapePools: make(map[string]*scrapePool),
graceShut: make(chan struct{}),
triggerReload: make(chan struct{}, 1),
metrics: sm,
}
targetMetadataCache.registerManager(m)
return m
m.metrics.setTargetMetadataCacheGatherer(m)
return m, nil
}
// Options are the configuration parameters to the scrape manager.
@ -154,6 +96,8 @@ type Manager struct {
targetSets map[string][]*targetgroup.Group
triggerReload chan struct{}
metrics *scrapeMetrics
}
// Run receives and saves target set updates and triggers the scraping loops reloading.
@ -211,8 +155,10 @@ func (m *Manager) reload() {
level.Error(m.logger).Log("msg", "error reloading target set", "err", "invalid config id:"+setName)
continue
}
sp, err := newScrapePool(scrapeConfig, m.append, m.offsetSeed, log.With(m.logger, "scrape_pool", setName), m.opts)
m.metrics.targetScrapePools.Inc()
sp, err := newScrapePool(scrapeConfig, m.append, m.offsetSeed, log.With(m.logger, "scrape_pool", setName), m.opts, m.metrics)
if err != nil {
m.metrics.targetScrapePoolsFailed.Inc()
level.Error(m.logger).Log("msg", "error creating new scrape pool", "err", err, "scrape_pool", setName)
continue
}

View file

@ -20,6 +20,7 @@ import (
"testing"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@ -492,10 +493,13 @@ scrape_configs:
cfg3 = loadConfiguration(t, cfgText3)
ch = make(chan struct{}, 1)
testRegistry = prometheus.NewRegistry()
)
opts := Options{}
scrapeManager := NewManager(&opts, nil, nil)
scrapeManager, err := NewManager(&opts, nil, nil, testRegistry)
require.NoError(t, err)
newLoop := func(scrapeLoopOptions) loop {
ch <- struct{}{}
return noopLoop()
@ -512,6 +516,7 @@ scrape_configs:
logger: nil,
config: cfg1.ScrapeConfigs[0],
client: http.DefaultClient,
metrics: scrapeManager.metrics,
}
scrapeManager.scrapePools = map[string]*scrapePool{
"job1": sp,
@ -560,7 +565,9 @@ scrape_configs:
func TestManagerTargetsUpdates(t *testing.T) {
opts := Options{}
m := NewManager(&opts, nil, nil)
testRegistry := prometheus.NewRegistry()
m, err := NewManager(&opts, nil, nil, testRegistry)
require.NoError(t, err)
ts := make(chan map[string][]*targetgroup.Group)
go m.Run(ts)
@ -613,7 +620,9 @@ global:
}
opts := Options{}
scrapeManager := NewManager(&opts, nil, nil)
testRegistry := prometheus.NewRegistry()
scrapeManager, err := NewManager(&opts, nil, nil, testRegistry)
require.NoError(t, err)
// Load the first config.
cfg1 := getConfig("ha1")
@ -658,8 +667,9 @@ scrape_configs:
- targets: ["foo:9093"]
`
var (
cfg1 = loadConfiguration(t, cfgText1)
cfg2 = loadConfiguration(t, cfgText2)
cfg1 = loadConfiguration(t, cfgText1)
cfg2 = loadConfiguration(t, cfgText2)
testRegistry = prometheus.NewRegistry()
)
reload := func(scrapeManager *Manager, cfg *config.Config) {
@ -695,7 +705,8 @@ scrape_configs:
}
opts := Options{}
scrapeManager := NewManager(&opts, nil, nil)
scrapeManager, err := NewManager(&opts, nil, nil, testRegistry)
require.NoError(t, err)
reload(scrapeManager, cfg1)
require.ElementsMatch(t, []string{"job1", "job2"}, scrapeManager.ScrapePools())

307
scrape/metrics.go Normal file
View file

@ -0,0 +1,307 @@
// Copyright 2016 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scrape
import (
"fmt"
"github.com/prometheus/client_golang/prometheus"
)
type scrapeMetrics struct {
// Used by Manager.
targetMetadataCache *MetadataMetricsCollector
targetScrapePools prometheus.Counter
targetScrapePoolsFailed prometheus.Counter
// Used by scrapePool.
targetReloadIntervalLength *prometheus.SummaryVec
targetScrapePoolReloads prometheus.Counter
targetScrapePoolReloadsFailed prometheus.Counter
targetScrapePoolSyncsCounter *prometheus.CounterVec
targetScrapePoolExceededTargetLimit prometheus.Counter
targetScrapePoolTargetLimit *prometheus.GaugeVec
targetScrapePoolTargetsAdded *prometheus.GaugeVec
targetSyncIntervalLength *prometheus.SummaryVec
targetSyncFailed *prometheus.CounterVec
// Used by targetScraper.
targetScrapeExceededBodySizeLimit prometheus.Counter
// Used by scrapeCache.
targetScrapeCacheFlushForced prometheus.Counter
// Used by scrapeLoop.
targetIntervalLength *prometheus.SummaryVec
targetScrapeSampleLimit prometheus.Counter
targetScrapeSampleDuplicate prometheus.Counter
targetScrapeSampleOutOfOrder prometheus.Counter
targetScrapeSampleOutOfBounds prometheus.Counter
targetScrapeExemplarOutOfOrder prometheus.Counter
targetScrapePoolExceededLabelLimits prometheus.Counter
targetScrapeNativeHistogramBucketLimit prometheus.Counter
}
func newScrapeMetrics(reg prometheus.Registerer) (*scrapeMetrics, error) {
sm := &scrapeMetrics{}
// Manager metrics.
sm.targetMetadataCache = &MetadataMetricsCollector{
CacheEntries: prometheus.NewDesc(
"prometheus_target_metadata_cache_entries",
"Total number of metric metadata entries in the cache",
[]string{"scrape_job"},
nil,
),
CacheBytes: prometheus.NewDesc(
"prometheus_target_metadata_cache_bytes",
"The number of bytes that are currently used for storing metric metadata in the cache",
[]string{"scrape_job"},
nil,
),
// TargetsGatherer should be set later, because it's a circular dependency.
// newScrapeMetrics() is called by NewManager(), while also TargetsGatherer is the new Manager.
}
sm.targetScrapePools = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pools_total",
Help: "Total number of scrape pool creation attempts.",
},
)
sm.targetScrapePoolsFailed = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pools_failed_total",
Help: "Total number of scrape pool creations that failed.",
},
)
// Used by scrapePool.
sm.targetReloadIntervalLength = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Name: "prometheus_target_reload_length_seconds",
Help: "Actual interval to reload the scrape pool with a given configuration.",
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
},
[]string{"interval"},
)
sm.targetScrapePoolReloads = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pool_reloads_total",
Help: "Total number of scrape pool reloads.",
},
)
sm.targetScrapePoolReloadsFailed = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pool_reloads_failed_total",
Help: "Total number of failed scrape pool reloads.",
},
)
sm.targetScrapePoolExceededTargetLimit = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pool_exceeded_target_limit_total",
Help: "Total number of times scrape pools hit the target limit, during sync or config reload.",
},
)
sm.targetScrapePoolTargetLimit = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "prometheus_target_scrape_pool_target_limit",
Help: "Maximum number of targets allowed in this scrape pool.",
},
[]string{"scrape_job"},
)
sm.targetScrapePoolTargetsAdded = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "prometheus_target_scrape_pool_targets",
Help: "Current number of targets in this scrape pool.",
},
[]string{"scrape_job"},
)
sm.targetScrapePoolSyncsCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pool_sync_total",
Help: "Total number of syncs that were executed on a scrape pool.",
},
[]string{"scrape_job"},
)
sm.targetSyncIntervalLength = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Name: "prometheus_target_sync_length_seconds",
Help: "Actual interval to sync the scrape pool.",
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
},
[]string{"scrape_job"},
)
sm.targetSyncFailed = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "prometheus_target_sync_failed_total",
Help: "Total number of target sync failures.",
},
[]string{"scrape_job"},
)
// Used by targetScraper.
sm.targetScrapeExceededBodySizeLimit = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_exceeded_body_size_limit_total",
Help: "Total number of scrapes that hit the body size limit",
},
)
// Used by scrapeCache.
sm.targetScrapeCacheFlushForced = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_cache_flush_forced_total",
Help: "How many times a scrape cache was flushed due to getting big while scrapes are failing.",
},
)
// Used by scrapeLoop.
sm.targetIntervalLength = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Name: "prometheus_target_interval_length_seconds",
Help: "Actual intervals between scrapes.",
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
},
[]string{"interval"},
)
sm.targetScrapeSampleLimit = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_exceeded_sample_limit_total",
Help: "Total number of scrapes that hit the sample limit and were rejected.",
},
)
sm.targetScrapeSampleDuplicate = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_sample_duplicate_timestamp_total",
Help: "Total number of samples rejected due to duplicate timestamps but different values.",
},
)
sm.targetScrapeSampleOutOfOrder = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_sample_out_of_order_total",
Help: "Total number of samples rejected due to not being out of the expected order.",
},
)
sm.targetScrapeSampleOutOfBounds = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_sample_out_of_bounds_total",
Help: "Total number of samples rejected due to timestamp falling outside of the time bounds.",
},
)
sm.targetScrapePoolExceededLabelLimits = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pool_exceeded_label_limits_total",
Help: "Total number of times scrape pools hit the label limits, during sync or config reload.",
},
)
sm.targetScrapeNativeHistogramBucketLimit = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_exceeded_native_histogram_bucket_limit_total",
Help: "Total number of scrapes that hit the native histogram bucket limit and were rejected.",
},
)
sm.targetScrapeExemplarOutOfOrder = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_exemplar_out_of_order_total",
Help: "Total number of exemplar rejected due to not being out of the expected order.",
},
)
for _, collector := range []prometheus.Collector{
// Used by Manager.
sm.targetMetadataCache,
sm.targetScrapePools,
sm.targetScrapePoolsFailed,
// Used by scrapePool.
sm.targetReloadIntervalLength,
sm.targetScrapePoolReloads,
sm.targetScrapePoolReloadsFailed,
sm.targetSyncIntervalLength,
sm.targetScrapePoolSyncsCounter,
sm.targetScrapePoolExceededTargetLimit,
sm.targetScrapePoolTargetLimit,
sm.targetScrapePoolTargetsAdded,
sm.targetSyncFailed,
// Used by targetScraper.
sm.targetScrapeExceededBodySizeLimit,
// Used by scrapeCache.
sm.targetScrapeCacheFlushForced,
// Used by scrapeLoop.
sm.targetIntervalLength,
sm.targetScrapeSampleLimit,
sm.targetScrapeSampleDuplicate,
sm.targetScrapeSampleOutOfOrder,
sm.targetScrapeSampleOutOfBounds,
sm.targetScrapeExemplarOutOfOrder,
sm.targetScrapePoolExceededLabelLimits,
sm.targetScrapeNativeHistogramBucketLimit,
} {
err := reg.Register(collector)
if err != nil {
return nil, fmt.Errorf("failed to register scrape metrics: %w", err)
}
}
return sm, nil
}
func (sm *scrapeMetrics) setTargetMetadataCacheGatherer(gatherer TargetsGatherer) {
sm.targetMetadataCache.TargetsGatherer = gatherer
}
type TargetsGatherer interface {
TargetsActive() map[string][]*Target
}
// MetadataMetricsCollector is a Custom Collector for the metadata cache metrics.
type MetadataMetricsCollector struct {
CacheEntries *prometheus.Desc
CacheBytes *prometheus.Desc
TargetsGatherer TargetsGatherer
}
// Describe sends the metrics descriptions to the channel.
func (mc *MetadataMetricsCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- mc.CacheEntries
ch <- mc.CacheBytes
}
// Collect creates and sends the metrics for the metadata cache.
func (mc *MetadataMetricsCollector) Collect(ch chan<- prometheus.Metric) {
if mc.TargetsGatherer == nil {
return
}
for tset, targets := range mc.TargetsGatherer.TargetsActive() {
var size, length int
for _, t := range targets {
size += t.MetadataSize()
length += t.MetadataLength()
}
ch <- prometheus.MustNewConstMetric(
mc.CacheEntries,
prometheus.GaugeValue,
float64(length),
tset,
)
ch <- prometheus.MustNewConstMetric(
mc.CacheBytes,
prometheus.GaugeValue,
float64(size),
tset,
)
}
}

View file

@ -31,7 +31,6 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
@ -61,172 +60,6 @@ var AlignScrapeTimestamps = true
var errNameLabelMandatory = fmt.Errorf("missing metric name (%s label)", labels.MetricName)
var (
targetIntervalLength = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Name: "prometheus_target_interval_length_seconds",
Help: "Actual intervals between scrapes.",
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
},
[]string{"interval"},
)
targetReloadIntervalLength = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Name: "prometheus_target_reload_length_seconds",
Help: "Actual interval to reload the scrape pool with a given configuration.",
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
},
[]string{"interval"},
)
targetScrapePools = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pools_total",
Help: "Total number of scrape pool creation attempts.",
},
)
targetScrapePoolsFailed = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pools_failed_total",
Help: "Total number of scrape pool creations that failed.",
},
)
targetScrapePoolReloads = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pool_reloads_total",
Help: "Total number of scrape pool reloads.",
},
)
targetScrapePoolReloadsFailed = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pool_reloads_failed_total",
Help: "Total number of failed scrape pool reloads.",
},
)
targetScrapePoolExceededTargetLimit = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pool_exceeded_target_limit_total",
Help: "Total number of times scrape pools hit the target limit, during sync or config reload.",
},
)
targetScrapePoolTargetLimit = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "prometheus_target_scrape_pool_target_limit",
Help: "Maximum number of targets allowed in this scrape pool.",
},
[]string{"scrape_job"},
)
targetScrapePoolTargetsAdded = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "prometheus_target_scrape_pool_targets",
Help: "Current number of targets in this scrape pool.",
},
[]string{"scrape_job"},
)
targetSyncIntervalLength = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Name: "prometheus_target_sync_length_seconds",
Help: "Actual interval to sync the scrape pool.",
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
},
[]string{"scrape_job"},
)
targetScrapePoolSyncsCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pool_sync_total",
Help: "Total number of syncs that were executed on a scrape pool.",
},
[]string{"scrape_job"},
)
targetScrapeExceededBodySizeLimit = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_exceeded_body_size_limit_total",
Help: "Total number of scrapes that hit the body size limit",
},
)
targetScrapeSampleLimit = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_exceeded_sample_limit_total",
Help: "Total number of scrapes that hit the sample limit and were rejected.",
},
)
targetScrapeSampleDuplicate = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_sample_duplicate_timestamp_total",
Help: "Total number of samples rejected due to duplicate timestamps but different values.",
},
)
targetScrapeSampleOutOfOrder = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_sample_out_of_order_total",
Help: "Total number of samples rejected due to not being out of the expected order.",
},
)
targetScrapeSampleOutOfBounds = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_sample_out_of_bounds_total",
Help: "Total number of samples rejected due to timestamp falling outside of the time bounds.",
},
)
targetScrapeCacheFlushForced = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_cache_flush_forced_total",
Help: "How many times a scrape cache was flushed due to getting big while scrapes are failing.",
},
)
targetScrapeExemplarOutOfOrder = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_exemplar_out_of_order_total",
Help: "Total number of exemplar rejected due to not being out of the expected order.",
},
)
targetScrapePoolExceededLabelLimits = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pool_exceeded_label_limits_total",
Help: "Total number of times scrape pools hit the label limits, during sync or config reload.",
},
)
targetSyncFailed = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "prometheus_target_sync_failed_total",
Help: "Total number of target sync failures.",
},
[]string{"scrape_job"},
)
targetScrapeNativeHistogramBucketLimit = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_exceeded_native_histogram_bucket_limit_total",
Help: "Total number of scrapes that hit the native histogram bucket limit and were rejected.",
},
)
)
func init() {
prometheus.MustRegister(
targetIntervalLength,
targetReloadIntervalLength,
targetScrapePools,
targetScrapePoolsFailed,
targetScrapePoolReloads,
targetScrapePoolReloadsFailed,
targetSyncIntervalLength,
targetScrapePoolSyncsCounter,
targetScrapeExceededBodySizeLimit,
targetScrapeSampleLimit,
targetScrapeSampleDuplicate,
targetScrapeSampleOutOfOrder,
targetScrapeSampleOutOfBounds,
targetScrapePoolExceededTargetLimit,
targetScrapePoolTargetLimit,
targetScrapePoolTargetsAdded,
targetScrapeCacheFlushForced,
targetMetadataCache,
targetScrapeExemplarOutOfOrder,
targetScrapePoolExceededLabelLimits,
targetSyncFailed,
targetScrapeNativeHistogramBucketLimit,
)
}
// scrapePool manages scrapes for sets of targets.
type scrapePool struct {
appendable storage.Appendable
@ -251,6 +84,8 @@ type scrapePool struct {
newLoop func(scrapeLoopOptions) loop
noDefaultPort bool
metrics *scrapeMetrics
}
type labelLimits struct {
@ -279,15 +114,13 @@ const maxAheadTime = 10 * time.Minute
// returning an empty label set is interpreted as "drop"
type labelsMutator func(labels.Labels) labels.Labels
func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed uint64, logger log.Logger, options *Options) (*scrapePool, error) {
targetScrapePools.Inc()
func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed uint64, logger log.Logger, options *Options, metrics *scrapeMetrics) (*scrapePool, error) {
if logger == nil {
logger = log.NewNopLogger()
}
client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, cfg.JobName, options.HTTPClientOptions...)
if err != nil {
targetScrapePoolsFailed.Inc()
return nil, errors.Wrap(err, "error creating HTTP client")
}
@ -302,6 +135,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed
activeTargets: map[uint64]*Target{},
loops: map[uint64]loop{},
logger: logger,
metrics: metrics,
httpOpts: options.HTTPClientOptions,
noDefaultPort: options.NoDefaultPort,
}
@ -309,7 +143,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed
// Update the targets retrieval function for metadata to a new scrape cache.
cache := opts.cache
if cache == nil {
cache = newScrapeCache()
cache = newScrapeCache(metrics)
}
opts.target.SetMetadataStore(cache)
@ -336,9 +170,10 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed
options.EnableMetadataStorage,
opts.target,
options.PassMetadataInContext,
metrics,
)
}
targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit))
sp.metrics.targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit))
return sp, nil
}
@ -393,11 +228,11 @@ func (sp *scrapePool) stop() {
sp.client.CloseIdleConnections()
if sp.config != nil {
targetScrapePoolSyncsCounter.DeleteLabelValues(sp.config.JobName)
targetScrapePoolTargetLimit.DeleteLabelValues(sp.config.JobName)
targetScrapePoolTargetsAdded.DeleteLabelValues(sp.config.JobName)
targetSyncIntervalLength.DeleteLabelValues(sp.config.JobName)
targetSyncFailed.DeleteLabelValues(sp.config.JobName)
sp.metrics.targetScrapePoolSyncsCounter.DeleteLabelValues(sp.config.JobName)
sp.metrics.targetScrapePoolTargetLimit.DeleteLabelValues(sp.config.JobName)
sp.metrics.targetScrapePoolTargetsAdded.DeleteLabelValues(sp.config.JobName)
sp.metrics.targetSyncIntervalLength.DeleteLabelValues(sp.config.JobName)
sp.metrics.targetSyncFailed.DeleteLabelValues(sp.config.JobName)
}
}
@ -407,12 +242,12 @@ func (sp *scrapePool) stop() {
func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error {
sp.mtx.Lock()
defer sp.mtx.Unlock()
targetScrapePoolReloads.Inc()
sp.metrics.targetScrapePoolReloads.Inc()
start := time.Now()
client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, cfg.JobName, sp.httpOpts...)
if err != nil {
targetScrapePoolReloadsFailed.Inc()
sp.metrics.targetScrapePoolReloadsFailed.Inc()
return errors.Wrap(err, "error creating HTTP client")
}
@ -421,7 +256,7 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error {
oldClient := sp.client
sp.client = client
targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit))
sp.metrics.targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit))
var (
wg sync.WaitGroup
@ -449,7 +284,7 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error {
oldLoop.disableEndOfRunStalenessMarkers()
cache = oc
} else {
cache = newScrapeCache()
cache = newScrapeCache(sp.metrics)
}
t := sp.activeTargets[fp]
@ -496,7 +331,7 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error {
wg.Wait()
oldClient.CloseIdleConnections()
targetReloadIntervalLength.WithLabelValues(interval.String()).Observe(
sp.metrics.targetReloadIntervalLength.WithLabelValues(interval.String()).Observe(
time.Since(start).Seconds(),
)
return nil
@ -520,7 +355,7 @@ func (sp *scrapePool) Sync(tgs []*targetgroup.Group) {
for _, err := range failures {
level.Error(sp.logger).Log("msg", "Creating target failed", "err", err)
}
targetSyncFailed.WithLabelValues(sp.config.JobName).Add(float64(len(failures)))
sp.metrics.targetSyncFailed.WithLabelValues(sp.config.JobName).Add(float64(len(failures)))
for _, t := range targets {
// Replicate .Labels().IsEmpty() with a loop here to avoid generating garbage.
nonEmpty := false
@ -539,10 +374,10 @@ func (sp *scrapePool) Sync(tgs []*targetgroup.Group) {
sp.targetMtx.Unlock()
sp.sync(all)
targetSyncIntervalLength.WithLabelValues(sp.config.JobName).Observe(
sp.metrics.targetSyncIntervalLength.WithLabelValues(sp.config.JobName).Observe(
time.Since(start).Seconds(),
)
targetScrapePoolSyncsCounter.WithLabelValues(sp.config.JobName).Inc()
sp.metrics.targetScrapePoolSyncsCounter.WithLabelValues(sp.config.JobName).Inc()
}
// sync takes a list of potentially duplicated targets, deduplicates them, starts
@ -583,6 +418,7 @@ func (sp *scrapePool) sync(targets []*Target) {
timeout: timeout,
bodySizeLimit: bodySizeLimit,
acceptHeader: acceptHeader(sp.config.ScrapeProtocols),
metrics: sp.metrics,
}
l := sp.newLoop(scrapeLoopOptions{
target: t,
@ -634,7 +470,7 @@ func (sp *scrapePool) sync(targets []*Target) {
sp.targetMtx.Unlock()
targetScrapePoolTargetsAdded.WithLabelValues(sp.config.JobName).Set(float64(len(uniqueLoops)))
sp.metrics.targetScrapePoolTargetsAdded.WithLabelValues(sp.config.JobName).Set(float64(len(uniqueLoops)))
forcedErr := sp.refreshTargetLimitErr()
for _, l := range sp.loops {
l.setForcedError(forcedErr)
@ -658,7 +494,7 @@ func (sp *scrapePool) refreshTargetLimitErr() error {
return nil
}
if l := len(sp.activeTargets); l > int(sp.config.TargetLimit) {
targetScrapePoolExceededTargetLimit.Inc()
sp.metrics.targetScrapePoolExceededTargetLimit.Inc()
return fmt.Errorf("target_limit exceeded (number of targets: %d, limit: %d)", l, sp.config.TargetLimit)
}
return nil
@ -806,6 +642,8 @@ type targetScraper struct {
bodySizeLimit int64
acceptHeader string
metrics *scrapeMetrics
}
var errBodySizeLimit = errors.New("body size limit exceeded")
@ -863,7 +701,7 @@ func (s *targetScraper) readResponse(ctx context.Context, resp *http.Response, w
return "", err
}
if n >= s.bodySizeLimit {
targetScrapeExceededBodySizeLimit.Inc()
s.metrics.targetScrapeExceededBodySizeLimit.Inc()
return "", errBodySizeLimit
}
return resp.Header.Get("Content-Type"), nil
@ -889,7 +727,7 @@ func (s *targetScraper) readResponse(ctx context.Context, resp *http.Response, w
return "", err
}
if n >= s.bodySizeLimit {
targetScrapeExceededBodySizeLimit.Inc()
s.metrics.targetScrapeExceededBodySizeLimit.Inc()
return "", errBodySizeLimit
}
return resp.Header.Get("Content-Type"), nil
@ -942,6 +780,8 @@ type scrapeLoop struct {
reportExtraMetrics bool
appendMetadataToWAL bool
metrics *scrapeMetrics
}
// scrapeCache tracks mappings of exposed metric strings to label sets and
@ -969,6 +809,8 @@ type scrapeCache struct {
metaMtx sync.Mutex
metadata map[string]*metaEntry
metrics *scrapeMetrics
}
// metaEntry holds meta information about a metric.
@ -984,13 +826,14 @@ func (m *metaEntry) size() int {
return len(m.Help) + len(m.Unit) + len(m.Type)
}
func newScrapeCache() *scrapeCache {
func newScrapeCache(metrics *scrapeMetrics) *scrapeCache {
return &scrapeCache{
series: map[string]*cacheEntry{},
droppedSeries: map[string]*uint64{},
seriesCur: map[uint64]labels.Labels{},
seriesPrev: map[uint64]labels.Labels{},
metadata: map[string]*metaEntry{},
metrics: metrics,
}
}
@ -1009,7 +852,7 @@ func (c *scrapeCache) iterDone(flushCache bool) {
// since the last scrape, and allow an additional 1000 in case
// initial scrapes all fail.
flushCache = true
targetScrapeCacheFlushForced.Inc()
c.metrics.targetScrapeCacheFlushForced.Inc()
}
if flushCache {
@ -1213,6 +1056,7 @@ func newScrapeLoop(ctx context.Context,
appendMetadataToWAL bool,
target *Target,
passMetadataInContext bool,
metrics *scrapeMetrics,
) *scrapeLoop {
if l == nil {
l = log.NewNopLogger()
@ -1221,7 +1065,7 @@ func newScrapeLoop(ctx context.Context,
buffers = pool.New(1e3, 1e6, 3, func(sz int) interface{} { return make([]byte, 0, sz) })
}
if cache == nil {
cache = newScrapeCache()
cache = newScrapeCache(metrics)
}
appenderCtx := ctx
@ -1256,6 +1100,7 @@ func newScrapeLoop(ctx context.Context,
scrapeClassicHistograms: scrapeClassicHistograms,
reportExtraMetrics: reportExtraMetrics,
appendMetadataToWAL: appendMetadataToWAL,
metrics: metrics,
}
sl.ctx, sl.cancel = context.WithCancel(ctx)
@ -1335,7 +1180,7 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er
// Only record after the first scrape.
if !last.IsZero() {
targetIntervalLength.WithLabelValues(sl.interval.String()).Observe(
sl.metrics.targetIntervalLength.WithLabelValues(sl.interval.String()).Observe(
time.Since(last).Seconds(),
)
}
@ -1676,7 +1521,7 @@ loop:
// If any label limits is exceeded the scrape should fail.
if err = verifyLabelLimits(lset, sl.labelLimits); err != nil {
targetScrapePoolExceededLabelLimits.Inc()
sl.metrics.targetScrapePoolExceededLabelLimits.Inc()
break loop
}
@ -1741,14 +1586,14 @@ loop:
err = sampleLimitErr
}
// We only want to increment this once per scrape, so this is Inc'd outside the loop.
targetScrapeSampleLimit.Inc()
sl.metrics.targetScrapeSampleLimit.Inc()
}
if bucketLimitErr != nil {
if err == nil {
err = bucketLimitErr // If sample limit is hit, that error takes precedence.
}
// We only want to increment this once per scrape, so this is Inc'd outside the loop.
targetScrapeNativeHistogramBucketLimit.Inc()
sl.metrics.targetScrapeNativeHistogramBucketLimit.Inc()
}
if appErrs.numOutOfOrder > 0 {
level.Warn(sl.l).Log("msg", "Error on ingesting out-of-order samples", "num_dropped", appErrs.numOutOfOrder)
@ -1792,17 +1637,17 @@ func (sl *scrapeLoop) checkAddError(ce *cacheEntry, met []byte, tp *int64, err e
case storage.ErrOutOfOrderSample:
appErrs.numOutOfOrder++
level.Debug(sl.l).Log("msg", "Out of order sample", "series", string(met))
targetScrapeSampleOutOfOrder.Inc()
sl.metrics.targetScrapeSampleOutOfOrder.Inc()
return false, nil
case storage.ErrDuplicateSampleForTimestamp:
appErrs.numDuplicates++
level.Debug(sl.l).Log("msg", "Duplicate sample for timestamp", "series", string(met))
targetScrapeSampleDuplicate.Inc()
sl.metrics.targetScrapeSampleDuplicate.Inc()
return false, nil
case storage.ErrOutOfBounds:
appErrs.numOutOfBounds++
level.Debug(sl.l).Log("msg", "Out of bounds metric", "series", string(met))
targetScrapeSampleOutOfBounds.Inc()
sl.metrics.targetScrapeSampleOutOfBounds.Inc()
return false, nil
case errSampleLimit:
// Keep on parsing output if we hit the limit, so we report the correct
@ -1826,7 +1671,7 @@ func (sl *scrapeLoop) checkAddExemplarError(err error, e exemplar.Exemplar, appE
case storage.ErrOutOfOrderExemplar:
appErrs.numExemplarOutOfOrder++
level.Debug(sl.l).Log("msg", "Out of order exemplar", "exemplar", fmt.Sprintf("%+v", e))
targetScrapeExemplarOutOfOrder.Inc()
sl.metrics.targetScrapeExemplarOutOfOrder.Inc()
return nil
default:
return err

View file

@ -57,11 +57,18 @@ func TestMain(m *testing.M) {
testutil.TolerantVerifyLeak(m)
}
func newTestScrapeMetrics(t testing.TB) *scrapeMetrics {
reg := prometheus.NewRegistry()
metrics, err := newScrapeMetrics(reg)
require.NoError(t, err)
return metrics
}
func TestNewScrapePool(t *testing.T) {
var (
app = &nopAppendable{}
cfg = &config.ScrapeConfig{}
sp, _ = newScrapePool(cfg, app, 0, nil, &Options{})
sp, _ = newScrapePool(cfg, app, 0, nil, &Options{}, newTestScrapeMetrics(t))
)
if a, ok := sp.appendable.(*nopAppendable); !ok || a != app {
@ -97,7 +104,7 @@ func TestDroppedTargetsList(t *testing.T) {
},
},
}
sp, _ = newScrapePool(cfg, app, 0, nil, &Options{})
sp, _ = newScrapePool(cfg, app, 0, nil, &Options{}, newTestScrapeMetrics(t))
expectedLabelSetString = "{__address__=\"127.0.0.1:9090\", __scrape_interval__=\"0s\", __scrape_timeout__=\"0s\", job=\"dropMe\"}"
expectedLength = 2
)
@ -117,7 +124,10 @@ func TestDroppedTargetsList(t *testing.T) {
// TestDiscoveredLabelsUpdate checks that DiscoveredLabels are updated
// even when new labels don't affect the target `hash`.
func TestDiscoveredLabelsUpdate(t *testing.T) {
sp := &scrapePool{}
sp := &scrapePool{
metrics: newTestScrapeMetrics(t),
}
// These are used when syncing so need this to avoid a panic.
sp.config = &config.ScrapeConfig{
ScrapeInterval: model.Duration(1),
@ -184,6 +194,7 @@ func TestScrapePoolStop(t *testing.T) {
loops: map[uint64]loop{},
cancel: func() {},
client: http.DefaultClient,
metrics: newTestScrapeMetrics(t),
}
var mtx sync.Mutex
stopped := map[uint64]bool{}
@ -262,6 +273,7 @@ func TestScrapePoolReload(t *testing.T) {
}
return l
}
sp := &scrapePool{
appendable: &nopAppendable{},
activeTargets: map[uint64]*Target{},
@ -269,6 +281,7 @@ func TestScrapePoolReload(t *testing.T) {
newLoop: newLoop,
logger: nil,
client: http.DefaultClient,
metrics: newTestScrapeMetrics(t),
}
// Reloading a scrape pool with a new scrape configuration must stop all scrape
@ -352,6 +365,7 @@ func TestScrapePoolReloadPreserveRelabeledIntervalTimeout(t *testing.T) {
newLoop: newLoop,
logger: nil,
client: http.DefaultClient,
metrics: newTestScrapeMetrics(t),
}
err := sp.reload(reloadCfg)
@ -381,6 +395,7 @@ func TestScrapePoolTargetLimit(t *testing.T) {
newLoop: newLoop,
logger: log.NewNopLogger(),
client: http.DefaultClient,
metrics: newTestScrapeMetrics(t),
}
tgs := []*targetgroup.Group{}
@ -489,7 +504,7 @@ func TestScrapePoolTargetLimit(t *testing.T) {
func TestScrapePoolAppender(t *testing.T) {
cfg := &config.ScrapeConfig{}
app := &nopAppendable{}
sp, _ := newScrapePool(cfg, app, 0, nil, &Options{})
sp, _ := newScrapePool(cfg, app, 0, nil, &Options{}, newTestScrapeMetrics(t))
loop := sp.newLoop(scrapeLoopOptions{
target: &Target{},
@ -545,7 +560,7 @@ func TestScrapePoolRaces(t *testing.T) {
newConfig := func() *config.ScrapeConfig {
return &config.ScrapeConfig{ScrapeInterval: interval, ScrapeTimeout: timeout}
}
sp, _ := newScrapePool(newConfig(), &nopAppendable{}, 0, nil, &Options{})
sp, _ := newScrapePool(newConfig(), &nopAppendable{}, 0, nil, &Options{}, newTestScrapeMetrics(t))
tgts := []*targetgroup.Group{
{
Targets: []model.LabelSet{
@ -595,6 +610,7 @@ func TestScrapePoolScrapeLoopsStarted(t *testing.T) {
newLoop: newLoop,
logger: nil,
client: http.DefaultClient,
metrics: newTestScrapeMetrics(t),
}
tgs := []*targetgroup.Group{
@ -643,6 +659,7 @@ func TestScrapeLoopStopBeforeRun(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
// The scrape pool synchronizes on stopping scrape loops. However, new scrape
@ -716,6 +733,7 @@ func TestScrapeLoopStop(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
// Terminate loop after 2 scrapes.
@ -793,6 +811,7 @@ func TestScrapeLoopRun(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
// The loop must terminate during the initial offset if the context
@ -849,6 +868,7 @@ func TestScrapeLoopRun(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
go func() {
@ -909,6 +929,7 @@ func TestScrapeLoopForcedErr(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
forcedErr := fmt.Errorf("forced err")
@ -945,7 +966,7 @@ func TestScrapeLoopMetadata(t *testing.T) {
var (
signal = make(chan struct{})
scraper = &testScraper{}
cache = newScrapeCache()
cache = newScrapeCache(newTestScrapeMetrics(t))
)
defer close(signal)
@ -968,6 +989,7 @@ func TestScrapeLoopMetadata(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
defer cancel()
@ -1026,6 +1048,7 @@ func simpleTestScrapeLoop(t testing.TB) (context.Context, *scrapeLoop) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
t.Cleanup(func() { cancel() })
@ -1087,6 +1110,7 @@ func TestScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
slApp := sl.appender(ctx)
@ -1166,6 +1190,7 @@ func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrape(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
// Succeed once, several failures, then stop.
numScrapes := 0
@ -1230,6 +1255,7 @@ func TestScrapeLoopRunCreatesStaleMarkersOnParseFailure(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
// Succeed once, several failures, then stop.
@ -1297,6 +1323,7 @@ func TestScrapeLoopCache(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
numScrapes := 0
@ -1381,6 +1408,7 @@ func TestScrapeLoopCacheMemoryExhaustionProtection(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
numScrapes := 0
@ -1496,6 +1524,7 @@ func TestScrapeLoopAppend(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
now := time.Now()
@ -1583,7 +1612,8 @@ func TestScrapeLoopAppendForConflictingPrefixedLabels(t *testing.T) {
return mutateSampleLabels(l, &Target{labels: labels.FromStrings(tc.targetLabels...)}, false, nil)
},
nil,
func(ctx context.Context) storage.Appender { return app }, nil, 0, true, 0, 0, nil, 0, 0, false, false, false, nil, false,
func(ctx context.Context) storage.Appender { return app },
nil, 0, true, 0, 0, nil, 0, 0, false, false, false, nil, false, newTestScrapeMetrics(t),
)
slApp := sl.appender(context.Background())
_, _, _, err := sl.append(slApp, []byte(tc.exposedLabels), "", time.Date(2000, 1, 1, 1, 0, 0, 0, time.UTC))
@ -1623,6 +1653,7 @@ func TestScrapeLoopAppendCacheEntryButErrNotFound(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
fakeRef := storage.SeriesRef(1)
@ -1682,11 +1713,12 @@ func TestScrapeLoopAppendSampleLimit(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
// Get the value of the Counter before performing the append.
beforeMetric := dto.Metric{}
err := targetScrapeSampleLimit.Write(&beforeMetric)
err := sl.metrics.targetScrapeSampleLimit.Write(&beforeMetric)
require.NoError(t, err)
beforeMetricValue := beforeMetric.GetCounter().GetValue()
@ -1705,7 +1737,7 @@ func TestScrapeLoopAppendSampleLimit(t *testing.T) {
// Check that the Counter has been incremented a single time for the scrape,
// not multiple times for each sample.
metric := dto.Metric{}
err = targetScrapeSampleLimit.Write(&metric)
err = sl.metrics.targetScrapeSampleLimit.Write(&metric)
require.NoError(t, err)
value := metric.GetCounter().GetValue()
@ -1760,10 +1792,11 @@ func TestScrapeLoop_HistogramBucketLimit(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
metric := dto.Metric{}
err := targetScrapeNativeHistogramBucketLimit.Write(&metric)
err := sl.metrics.targetScrapeNativeHistogramBucketLimit.Write(&metric)
require.NoError(t, err)
beforeMetricValue := metric.GetCounter().GetValue()
@ -1801,7 +1834,7 @@ func TestScrapeLoop_HistogramBucketLimit(t *testing.T) {
require.Equal(t, 3, added)
require.Equal(t, 3, seriesAdded)
err = targetScrapeNativeHistogramBucketLimit.Write(&metric)
err = sl.metrics.targetScrapeNativeHistogramBucketLimit.Write(&metric)
require.NoError(t, err)
metricValue := metric.GetCounter().GetValue()
require.Equal(t, beforeMetricValue, metricValue)
@ -1827,7 +1860,7 @@ func TestScrapeLoop_HistogramBucketLimit(t *testing.T) {
require.Equal(t, 3, added)
require.Equal(t, 0, seriesAdded)
err = targetScrapeNativeHistogramBucketLimit.Write(&metric)
err = sl.metrics.targetScrapeNativeHistogramBucketLimit.Write(&metric)
require.NoError(t, err)
metricValue = metric.GetCounter().GetValue()
require.Equal(t, beforeMetricValue+1, metricValue)
@ -1859,6 +1892,7 @@ func TestScrapeLoop_ChangingMetricString(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
now := time.Now()
@ -1908,6 +1942,7 @@ func TestScrapeLoopAppendStaleness(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
now := time.Now()
@ -1960,6 +1995,7 @@ func TestScrapeLoopAppendNoStalenessIfTimestamp(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
now := time.Now()
@ -2286,6 +2322,7 @@ metric: <
false,
nil,
false,
newTestScrapeMetrics(t),
)
now := time.Now()
@ -2374,6 +2411,7 @@ func TestScrapeLoopAppendExemplarSeries(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
now := time.Now()
@ -2427,6 +2465,7 @@ func TestScrapeLoopRunReportsTargetDownOnScrapeError(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error {
@ -2464,6 +2503,7 @@ func TestScrapeLoopRunReportsTargetDownOnInvalidUTF8(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error {
@ -2514,6 +2554,7 @@ func TestScrapeLoopAppendGracefullyIfAmendOrOutOfOrderOrOutOfBounds(t *testing.T
false,
nil,
false,
newTestScrapeMetrics(t),
)
now := time.Unix(1, 0)
@ -2560,6 +2601,7 @@ func TestScrapeLoopOutOfBoundsTimeError(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
now := time.Now().Add(20 * time.Minute)
@ -2755,6 +2797,7 @@ func TestTargetScraperBodySizeLimit(t *testing.T) {
client: http.DefaultClient,
bodySizeLimit: bodySizeLimit,
acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols),
metrics: newTestScrapeMetrics(t),
}
var buf bytes.Buffer
@ -2849,6 +2892,7 @@ func TestScrapeLoop_RespectTimestamps(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
now := time.Now()
@ -2891,6 +2935,7 @@ func TestScrapeLoop_DiscardTimestamps(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
now := time.Now()
@ -2932,6 +2977,7 @@ func TestScrapeLoopDiscardDuplicateLabels(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
defer cancel()
@ -2991,6 +3037,7 @@ func TestScrapeLoopDiscardUnnamedMetrics(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
defer cancel()
@ -3083,7 +3130,7 @@ func TestReuseScrapeCache(t *testing.T) {
ScrapeInterval: model.Duration(5 * time.Second),
MetricsPath: "/metrics",
}
sp, _ = newScrapePool(cfg, app, 0, nil, &Options{})
sp, _ = newScrapePool(cfg, app, 0, nil, &Options{}, newTestScrapeMetrics(t))
t1 = &Target{
discoveredLabels: labels.FromStrings("labelNew", "nameNew", "labelNew1", "nameNew1", "labelNew2", "nameNew2"),
}
@ -3255,6 +3302,7 @@ func TestScrapeAddFast(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
defer cancel()
@ -3275,7 +3323,7 @@ func TestScrapeAddFast(t *testing.T) {
require.NoError(t, slApp.Commit())
}
func TestReuseCacheRace(*testing.T) {
func TestReuseCacheRace(t *testing.T) {
var (
app = &nopAppendable{}
cfg = &config.ScrapeConfig{
@ -3284,7 +3332,7 @@ func TestReuseCacheRace(*testing.T) {
ScrapeInterval: model.Duration(5 * time.Second),
MetricsPath: "/metrics",
}
sp, _ = newScrapePool(cfg, app, 0, nil, &Options{})
sp, _ = newScrapePool(cfg, app, 0, nil, &Options{}, newTestScrapeMetrics(t))
t1 = &Target{
discoveredLabels: labels.FromStrings("labelNew", "nameNew"),
}
@ -3309,7 +3357,7 @@ func TestReuseCacheRace(*testing.T) {
func TestCheckAddError(t *testing.T) {
var appErrs appendErrors
sl := scrapeLoop{l: log.NewNopLogger()}
sl := scrapeLoop{l: log.NewNopLogger(), metrics: newTestScrapeMetrics(t)}
sl.checkAddError(nil, nil, nil, storage.ErrOutOfOrderSample, nil, nil, &appErrs)
require.Equal(t, 1, appErrs.numOutOfOrder)
}
@ -3342,6 +3390,7 @@ func TestScrapeReportSingleAppender(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
numScrapes := 0
@ -3412,7 +3461,7 @@ func TestScrapeReportLimit(t *testing.T) {
}))
defer ts.Close()
sp, err := newScrapePool(cfg, s, 0, nil, &Options{})
sp, err := newScrapePool(cfg, s, 0, nil, &Options{}, newTestScrapeMetrics(t))
require.NoError(t, err)
defer sp.stop()
@ -3545,6 +3594,7 @@ func TestScrapeLoopLabelLimit(t *testing.T) {
false,
nil,
false,
newTestScrapeMetrics(t),
)
slApp := sl.appender(context.Background())
@ -3583,7 +3633,7 @@ func TestTargetScrapeIntervalAndTimeoutRelabel(t *testing.T) {
},
},
}
sp, _ := newScrapePool(config, &nopAppendable{}, 0, nil, &Options{})
sp, _ := newScrapePool(config, &nopAppendable{}, 0, nil, &Options{}, newTestScrapeMetrics(t))
tgts := []*targetgroup.Group{
{
Targets: []model.LabelSet{{model.AddressLabel: "127.0.0.1:9090"}},

View file

@ -298,6 +298,7 @@ type dbMetrics struct {
tombCleanTimer prometheus.Histogram
blocksBytes prometheus.Gauge
maxBytes prometheus.Gauge
retentionDuration prometheus.Gauge
}
func newDBMetrics(db *DB, r prometheus.Registerer) *dbMetrics {
@ -371,6 +372,10 @@ func newDBMetrics(db *DB, r prometheus.Registerer) *dbMetrics {
Name: "prometheus_tsdb_retention_limit_bytes",
Help: "Max number of bytes to be retained in the tsdb blocks, configured 0 means disabled",
})
m.retentionDuration = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "prometheus_tsdb_retention_limit_seconds",
Help: "How long to retain samples in storage.",
})
m.sizeRetentionCount = prometheus.NewCounter(prometheus.CounterOpts{
Name: "prometheus_tsdb_size_retentions_total",
Help: "The number of times that blocks were deleted because the maximum number of bytes was exceeded.",
@ -391,6 +396,7 @@ func newDBMetrics(db *DB, r prometheus.Registerer) *dbMetrics {
m.tombCleanTimer,
m.blocksBytes,
m.maxBytes,
m.retentionDuration,
)
}
return m
@ -936,6 +942,7 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs
maxBytes = 0
}
db.metrics.maxBytes.Set(float64(maxBytes))
db.metrics.retentionDuration.Set((time.Duration(opts.RetentionDuration) * time.Millisecond).Seconds())
if err := db.reload(); err != nil {
return nil, err

View file

@ -1501,6 +1501,19 @@ func TestTimeRetention(t *testing.T) {
require.Equal(t, expBlocks[len(expBlocks)-1].MaxTime, actBlocks[len(actBlocks)-1].meta.MaxTime)
}
func TestRetentionDurationMetric(t *testing.T) {
db := openTestDB(t, &Options{
RetentionDuration: 1000,
}, []int64{100})
defer func() {
require.NoError(t, db.Close())
}()
expRetentionDuration := 1.0
actRetentionDuration := prom_testutil.ToFloat64(db.metrics.retentionDuration)
require.Equal(t, expRetentionDuration, actRetentionDuration, "metric retention duration mismatch")
}
func TestSizeRetention(t *testing.T) {
opts := DefaultOptions()
opts.OutOfOrderTimeWindow = 100

View file

@ -304,6 +304,10 @@ func (d *Decoder) Samples(rec []byte, samples []RefSample) ([]RefSample, error)
baseRef = dec.Be64()
baseTime = dec.Be64int64()
)
// Allow 1 byte for each varint and 8 for the value; the output slice must be at least that big.
if minSize := dec.Len() / (1 + 1 + 8); cap(samples) < minSize {
samples = make([]RefSample, 0, minSize)
}
for len(dec.B) > 0 && dec.Err() == nil {
dref := dec.Varint64()
dtime := dec.Varint64()

View file

@ -116,6 +116,9 @@ type annoErr struct {
}
func (e annoErr) Error() string {
if e.Query == "" {
return e.Err.Error()
}
return fmt.Sprintf("%s (%s)", e.Err, e.PositionRange.StartPosInput(e.Query, 0))
}

View file

@ -2,7 +2,7 @@ import * as React from 'react';
import { shallow } from 'enzyme';
import { WALReplayData } from '../types/types';
import { StartingContent } from './withStartingIndicator';
import { Progress } from 'reactstrap';
import { Alert, Progress } from 'reactstrap';
describe('Starting', () => {
describe('progress bar', () => {
@ -52,5 +52,17 @@ describe('Starting', () => {
expect(progress.prop('value')).toBe(21);
expect(progress.prop('color')).toBe('success');
});
it('shows unexpected error', () => {
const status: WALReplayData = {
min: 0,
max: 20,
current: 0,
};
const starting = shallow(<StartingContent status={status} isUnexpected={true} />);
const alert = starting.find(Alert);
expect(alert.prop('color')).toBe('danger');
});
});
});

View file

@ -51,7 +51,7 @@ export const withStartingIndicator =
const { ready, walReplayStatus, isUnexpected } = useFetchReadyInterval(pathPrefix);
const staticReady = useReady();
if (staticReady || ready || isUnexpected) {
if (staticReady || ready) {
return <Page {...(rest as T)} />;
}