mirror of
https://github.com/prometheus/prometheus.git
synced 2024-12-24 05:04:05 -08:00
add rule_group label to rule evaluation metrics (#7094)
Signed-off-by: yeya24 <yb532204897@gmail.com>
This commit is contained in:
parent
9a21fdcd1b
commit
00730bfee7
|
@ -54,11 +54,11 @@ const namespace = "prometheus"
|
||||||
// Metrics for rule evaluation.
|
// Metrics for rule evaluation.
|
||||||
type Metrics struct {
|
type Metrics struct {
|
||||||
evalDuration prometheus.Summary
|
evalDuration prometheus.Summary
|
||||||
evalFailures prometheus.Counter
|
|
||||||
evalTotal prometheus.Counter
|
|
||||||
iterationDuration prometheus.Summary
|
iterationDuration prometheus.Summary
|
||||||
iterationsMissed prometheus.Counter
|
iterationsMissed prometheus.Counter
|
||||||
iterationsScheduled prometheus.Counter
|
iterationsScheduled prometheus.Counter
|
||||||
|
evalTotal *prometheus.CounterVec
|
||||||
|
evalFailures *prometheus.CounterVec
|
||||||
groupInterval *prometheus.GaugeVec
|
groupInterval *prometheus.GaugeVec
|
||||||
groupLastEvalTime *prometheus.GaugeVec
|
groupLastEvalTime *prometheus.GaugeVec
|
||||||
groupLastDuration *prometheus.GaugeVec
|
groupLastDuration *prometheus.GaugeVec
|
||||||
|
@ -76,18 +76,6 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics {
|
||||||
Help: "The duration for a rule to execute.",
|
Help: "The duration for a rule to execute.",
|
||||||
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
|
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
|
||||||
}),
|
}),
|
||||||
evalFailures: prometheus.NewCounter(
|
|
||||||
prometheus.CounterOpts{
|
|
||||||
Namespace: namespace,
|
|
||||||
Name: "rule_evaluation_failures_total",
|
|
||||||
Help: "The total number of rule evaluation failures.",
|
|
||||||
}),
|
|
||||||
evalTotal: prometheus.NewCounter(
|
|
||||||
prometheus.CounterOpts{
|
|
||||||
Namespace: namespace,
|
|
||||||
Name: "rule_evaluations_total",
|
|
||||||
Help: "The total number of rule evaluations.",
|
|
||||||
}),
|
|
||||||
iterationDuration: prometheus.NewSummary(prometheus.SummaryOpts{
|
iterationDuration: prometheus.NewSummary(prometheus.SummaryOpts{
|
||||||
Namespace: namespace,
|
Namespace: namespace,
|
||||||
Name: "rule_group_duration_seconds",
|
Name: "rule_group_duration_seconds",
|
||||||
|
@ -104,6 +92,22 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics {
|
||||||
Name: "rule_group_iterations_total",
|
Name: "rule_group_iterations_total",
|
||||||
Help: "The total number of scheduled rule group evaluations, whether executed or missed.",
|
Help: "The total number of scheduled rule group evaluations, whether executed or missed.",
|
||||||
}),
|
}),
|
||||||
|
evalTotal: prometheus.NewCounterVec(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Namespace: namespace,
|
||||||
|
Name: "rule_evaluations_total",
|
||||||
|
Help: "The total number of rule evaluations.",
|
||||||
|
},
|
||||||
|
[]string{"rule_group"},
|
||||||
|
),
|
||||||
|
evalFailures: prometheus.NewCounterVec(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Namespace: namespace,
|
||||||
|
Name: "rule_evaluation_failures_total",
|
||||||
|
Help: "The total number of rule evaluation failures.",
|
||||||
|
},
|
||||||
|
[]string{"rule_group"},
|
||||||
|
),
|
||||||
groupInterval: prometheus.NewGaugeVec(
|
groupInterval: prometheus.NewGaugeVec(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
Namespace: namespace,
|
Namespace: namespace,
|
||||||
|
@ -141,11 +145,11 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics {
|
||||||
if reg != nil {
|
if reg != nil {
|
||||||
reg.MustRegister(
|
reg.MustRegister(
|
||||||
m.evalDuration,
|
m.evalDuration,
|
||||||
m.evalFailures,
|
|
||||||
m.evalTotal,
|
|
||||||
m.iterationDuration,
|
m.iterationDuration,
|
||||||
m.iterationsMissed,
|
m.iterationsMissed,
|
||||||
m.iterationsScheduled,
|
m.iterationsScheduled,
|
||||||
|
m.evalTotal,
|
||||||
|
m.evalFailures,
|
||||||
m.groupInterval,
|
m.groupInterval,
|
||||||
m.groupLastEvalTime,
|
m.groupLastEvalTime,
|
||||||
m.groupLastDuration,
|
m.groupLastDuration,
|
||||||
|
@ -257,10 +261,13 @@ func NewGroup(o GroupOptions) *Group {
|
||||||
metrics = NewGroupMetrics(o.Opts.Registerer)
|
metrics = NewGroupMetrics(o.Opts.Registerer)
|
||||||
}
|
}
|
||||||
|
|
||||||
metrics.groupLastEvalTime.WithLabelValues(groupKey(o.File, o.Name))
|
key := groupKey(o.File, o.Name)
|
||||||
metrics.groupLastDuration.WithLabelValues(groupKey(o.File, o.Name))
|
metrics.evalTotal.WithLabelValues(key)
|
||||||
metrics.groupRules.WithLabelValues(groupKey(o.File, o.Name)).Set(float64(len(o.Rules)))
|
metrics.evalFailures.WithLabelValues(key)
|
||||||
metrics.groupInterval.WithLabelValues(groupKey(o.File, o.Name)).Set(o.Interval.Seconds())
|
metrics.groupLastEvalTime.WithLabelValues(key)
|
||||||
|
metrics.groupLastDuration.WithLabelValues(key)
|
||||||
|
metrics.groupRules.WithLabelValues(key).Set(float64(len(o.Rules)))
|
||||||
|
metrics.groupInterval.WithLabelValues(key).Set(o.Interval.Seconds())
|
||||||
|
|
||||||
return &Group{
|
return &Group{
|
||||||
name: o.Name,
|
name: o.Name,
|
||||||
|
@ -567,7 +574,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
|
||||||
rule.SetEvaluationTimestamp(t)
|
rule.SetEvaluationTimestamp(t)
|
||||||
}(time.Now())
|
}(time.Now())
|
||||||
|
|
||||||
g.metrics.evalTotal.Inc()
|
g.metrics.evalTotal.WithLabelValues(groupKey(g.File(), g.Name())).Inc()
|
||||||
|
|
||||||
vector, err := rule.Eval(ctx, ts, g.opts.QueryFunc, g.opts.ExternalURL)
|
vector, err := rule.Eval(ctx, ts, g.opts.QueryFunc, g.opts.ExternalURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -576,7 +583,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
|
||||||
if _, ok := err.(promql.ErrQueryCanceled); !ok {
|
if _, ok := err.(promql.ErrQueryCanceled); !ok {
|
||||||
level.Warn(g.logger).Log("msg", "Evaluating rule failed", "rule", rule, "err", err)
|
level.Warn(g.logger).Log("msg", "Evaluating rule failed", "rule", rule, "err", err)
|
||||||
}
|
}
|
||||||
g.metrics.evalFailures.Inc()
|
g.metrics.evalFailures.WithLabelValues(groupKey(g.File(), g.Name())).Inc()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -945,6 +952,8 @@ func (m *Manager) Update(interval time.Duration, files []string, externalLabels
|
||||||
go func(n string, g *Group) {
|
go func(n string, g *Group) {
|
||||||
g.stopAndMakeStale()
|
g.stopAndMakeStale()
|
||||||
if m := g.metrics; m != nil {
|
if m := g.metrics; m != nil {
|
||||||
|
m.evalTotal.DeleteLabelValues(n)
|
||||||
|
m.evalFailures.DeleteLabelValues(n)
|
||||||
m.groupInterval.DeleteLabelValues(n)
|
m.groupInterval.DeleteLabelValues(n)
|
||||||
m.groupLastEvalTime.DeleteLabelValues(n)
|
m.groupLastEvalTime.DeleteLabelValues(n)
|
||||||
m.groupLastDuration.DeleteLabelValues(n)
|
m.groupLastDuration.DeleteLabelValues(n)
|
||||||
|
|
|
@ -901,6 +901,8 @@ func TestNotify(t *testing.T) {
|
||||||
func TestMetricsUpdate(t *testing.T) {
|
func TestMetricsUpdate(t *testing.T) {
|
||||||
files := []string{"fixtures/rules.yaml", "fixtures/rules2.yaml"}
|
files := []string{"fixtures/rules.yaml", "fixtures/rules2.yaml"}
|
||||||
metricNames := []string{
|
metricNames := []string{
|
||||||
|
"prometheus_rule_evaluations_total",
|
||||||
|
"prometheus_rule_evaluation_failures_total",
|
||||||
"prometheus_rule_group_interval_seconds",
|
"prometheus_rule_group_interval_seconds",
|
||||||
"prometheus_rule_group_last_duration_seconds",
|
"prometheus_rule_group_last_duration_seconds",
|
||||||
"prometheus_rule_group_last_evaluation_timestamp_seconds",
|
"prometheus_rule_group_last_evaluation_timestamp_seconds",
|
||||||
|
@ -950,11 +952,11 @@ func TestMetricsUpdate(t *testing.T) {
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
files: files,
|
files: files,
|
||||||
metrics: 8,
|
metrics: 12,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
files: files[:1],
|
files: files[:1],
|
||||||
metrics: 4,
|
metrics: 6,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
files: files[:0],
|
files: files[:0],
|
||||||
|
@ -962,7 +964,7 @@ func TestMetricsUpdate(t *testing.T) {
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
files: files[1:],
|
files: files[1:],
|
||||||
metrics: 4,
|
metrics: 6,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue