From 5ab24a06d0870a45364180dbe4d946ef33bed9c4 Mon Sep 17 00:00:00 2001 From: komisan19 <18901496+komisan19@users.noreply.github.com> Date: Sun, 21 Apr 2024 23:31:50 +0900 Subject: [PATCH 01/44] refactor: add max func to maxTimestamp Signed-off-by: komisan19 <18901496+komisan19@users.noreply.github.com> --- .../remote/otlptranslator/prometheusremotewrite/helper.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/storage/remote/otlptranslator/prometheusremotewrite/helper.go index 817cbaba7..85019e272 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -442,10 +442,8 @@ func mostRecentTimestampInMetric(metric pmetric.Metric) pcommon.Timestamp { } func maxTimestamp(a, b pcommon.Timestamp) pcommon.Timestamp { - if a > b { - return a - } - return b + resultTimestamp := max(a, b) + return resultTimestamp } // addSingleSummaryDataPoint converts pt to len(QuantileValues) + 2 samples. From 3d84d4d6dcfb610bf0866718f13e8a3b689bcca0 Mon Sep 17 00:00:00 2001 From: komisan19 <18901496+komisan19@users.noreply.github.com> Date: Mon, 22 Apr 2024 19:03:19 +0900 Subject: [PATCH 02/44] fix Signed-off-by: komisan19 <18901496+komisan19@users.noreply.github.com> --- storage/remote/otlptranslator/prometheusremotewrite/helper.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/storage/remote/otlptranslator/prometheusremotewrite/helper.go index 85019e272..da4ca07d8 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -442,8 +442,7 @@ func mostRecentTimestampInMetric(metric pmetric.Metric) pcommon.Timestamp { } func maxTimestamp(a, b pcommon.Timestamp) pcommon.Timestamp { - resultTimestamp := max(a, b) - return resultTimestamp + return max(a, b) } // addSingleSummaryDataPoint converts pt to len(QuantileValues) + 2 samples. From e7219e3d366bc912381577e7eb5ec256fc13f275 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 23 Apr 2024 09:54:21 +0100 Subject: [PATCH 03/44] Rule Manager: Add `rule_group_last_restore_duration_seconds` to measure restore time per rule group When a rule group changes or prometheus is restarted we need to ensure we restore the active alerts that were firing for a corresponding rule, for that Prometheus uses the `ALERTS_FOR_STATE` series to query the previous state and restore it. If a given rule has high cardinality (think 100s of 1000s for series) this proccess can take a bit of time - this is the first of a series of PRs to improve this problem and I'd like to start with exposing the time it takes to restore a rule group as a gauge. Signed-off-by: gotjosh --- rules/group.go | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/rules/group.go b/rules/group.go index c268d2df7..aafab5544 100644 --- a/rules/group.go +++ b/rules/group.go @@ -230,7 +230,9 @@ func (g *Group) run(ctx context.Context) { g.evalIterationFunc(ctx, g, evalTimestamp) } - g.RestoreForState(time.Now()) + now := time.Now() + g.RestoreForState(now) + g.metrics.GroupLastRestoreDuration.WithLabelValues(GroupKey(g.file, g.name)).Set(time.Since(now).Seconds()) g.shouldRestore = false } @@ -779,17 +781,18 @@ const namespace = "prometheus" // Metrics for rule evaluation. type Metrics struct { - EvalDuration prometheus.Summary - IterationDuration prometheus.Summary - IterationsMissed *prometheus.CounterVec - IterationsScheduled *prometheus.CounterVec - EvalTotal *prometheus.CounterVec - EvalFailures *prometheus.CounterVec - GroupInterval *prometheus.GaugeVec - GroupLastEvalTime *prometheus.GaugeVec - GroupLastDuration *prometheus.GaugeVec - GroupRules *prometheus.GaugeVec - GroupSamples *prometheus.GaugeVec + EvalDuration prometheus.Summary + IterationDuration prometheus.Summary + IterationsMissed *prometheus.CounterVec + IterationsScheduled *prometheus.CounterVec + EvalTotal *prometheus.CounterVec + EvalFailures *prometheus.CounterVec + GroupInterval *prometheus.GaugeVec + GroupLastEvalTime *prometheus.GaugeVec + GroupLastDuration *prometheus.GaugeVec + GroupLastRestoreDuration *prometheus.GaugeVec + GroupRules *prometheus.GaugeVec + GroupSamples *prometheus.GaugeVec } // NewGroupMetrics creates a new instance of Metrics and registers it with the provided registerer, @@ -865,6 +868,14 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics { }, []string{"rule_group"}, ), + GroupLastRestoreDuration: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Name: "rule_group_last_restore_duration_seconds", + Help: "The duration of the last rule group restoration.", + }, + []string{"rule_group"}, + ), GroupRules: prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: namespace, @@ -894,6 +905,7 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics { m.GroupInterval, m.GroupLastEvalTime, m.GroupLastDuration, + m.GroupLastRestoreDuration, m.GroupRules, m.GroupSamples, ) From 5e638b7f44ee5dfc81b3eea9b0c2189cc504a475 Mon Sep 17 00:00:00 2001 From: tesla59 Date: Wed, 24 Apr 2024 02:56:15 +0530 Subject: [PATCH 04/44] docs: storage.md: clarify storage.tsdb.retention.time description Signed-off-by: tesla59 --- docs/storage.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/storage.md b/docs/storage.md index b4c5b6ada..aae16a170 100644 --- a/docs/storage.md +++ b/docs/storage.md @@ -84,8 +84,10 @@ or 31 days, whichever is smaller. Prometheus has several flags that configure local storage. The most important are: - `--storage.tsdb.path`: Where Prometheus writes its database. Defaults to `data/`. -- `--storage.tsdb.retention.time`: When to remove old data. Defaults to `15d`. - Overrides `storage.tsdb.retention` if this flag is set to anything other than default. +- `--storage.tsdb.retention.time`: How long to retain samples in storage. When this flag is + set it overrides `storage.tsdb.retention`. If neither this flag nor `storage.tsdb.retention` + nor `storage.tsdb.retention.size` is set, the retention time defaults to `15d`. + Units Supported: y, w, d, h, m, s, ms. - `--storage.tsdb.retention.size`: The maximum number of bytes of storage blocks to retain. The oldest data will be removed first. Defaults to `0` or disabled. Units supported: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Only From 381a77ac1e1ef5e616ad45630dc6700a1916ba1d Mon Sep 17 00:00:00 2001 From: gotjosh Date: Wed, 24 Apr 2024 14:21:11 +0100 Subject: [PATCH 05/44] Change variable name to `restoreStartTime` from `now` and introduce a log line to record total time Signed-off-by: gotjosh --- rules/group.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/rules/group.go b/rules/group.go index aafab5544..27be4b1f4 100644 --- a/rules/group.go +++ b/rules/group.go @@ -230,9 +230,11 @@ func (g *Group) run(ctx context.Context) { g.evalIterationFunc(ctx, g, evalTimestamp) } - now := time.Now() - g.RestoreForState(now) - g.metrics.GroupLastRestoreDuration.WithLabelValues(GroupKey(g.file, g.name)).Set(time.Since(now).Seconds()) + restoreStartTime := time.Now() + g.RestoreForState(restoreStartTime) + totalRestoreTimeSeconds := time.Since(restoreStartTime).Seconds() + g.metrics.GroupLastRestoreDuration.WithLabelValues(GroupKey(g.file, g.name)).Set(totalRestoreTimeSeconds) + level.Debug(g.logger).Log("msg", "'for' state restoration completed", "duration_seconds", totalRestoreTimeSeconds) g.shouldRestore = false } From d672eda97949aabc57d151705f4cdbc847256626 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Wed, 24 Apr 2024 14:31:18 +0100 Subject: [PATCH 06/44] Add a changelog entry Signed-off-by: gotjosh --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0afd8d702..23d2c89da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## unreleased * [CHANGE] TSDB: Fix the predicate checking for blocks which are beyond the retention period to include the ones right at the retention boundary. #9633 +* [ENHANCEMENT] Rules: Add `rule_group_last_restore_duration_seconds` to measure the time it takes to restore a rule group. #13974 ## 2.51.2 / 2024-04-09 From 5beb2fe0051fb0ea04e32ab0e0b8bdac86d3ae75 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Wed, 24 Apr 2024 15:24:35 +0100 Subject: [PATCH 07/44] Improve the metric description Signed-off-by: gotjosh --- rules/group.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/group.go b/rules/group.go index 27be4b1f4..987136a00 100644 --- a/rules/group.go +++ b/rules/group.go @@ -874,7 +874,7 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics { prometheus.GaugeOpts{ Namespace: namespace, Name: "rule_group_last_restore_duration_seconds", - Help: "The duration of the last rule group restoration.", + Help: "The duration of the last alert rules alerts restoration using the `ALERTS_FOR_STATE` series.", }, []string{"rule_group"}, ), From d15869af32f918b9a958c3a73144f4834c935a75 Mon Sep 17 00:00:00 2001 From: Alan Protasio Date: Wed, 24 Apr 2024 07:41:33 -0700 Subject: [PATCH 08/44] Avoid creating new slices for labels values on postings for matchers (#13958) * Avoid creating new slices for labels values on postings for matchers Signed-off-by: alanprot * refactor Signed-off-by: alanprot --------- Signed-off-by: alanprot --- tsdb/querier.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tsdb/querier.go b/tsdb/querier.go index 8ebedfe52..a6763e996 100644 --- a/tsdb/querier.go +++ b/tsdb/querier.go @@ -331,7 +331,7 @@ func postingsForMatcher(ctx context.Context, ix IndexReader, m *labels.Matcher) return nil, err } - var res []string + res := vals[:0] for _, val := range vals { if m.Matches(val) { res = append(res, val) @@ -368,7 +368,7 @@ func inversePostingsForMatcher(ctx context.Context, ix IndexReader, m *labels.Ma return nil, err } - var res []string + res := vals[:0] // If the inverse match is ="", we just want all the values. if m.Type == labels.MatchEqual && m.Value == "" { res = vals From 4daaa59c081fc63bcdf3d73e77babeb6f5494a53 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 23 Apr 2024 19:40:10 +0100 Subject: [PATCH 09/44] Rule Manager: Only query once per alert rule when restoring alert state Prometheus restores alert state between restarts and updates. For each rule, it looks at the alerts that are meant to be active and then queries the `ALERTS_FOR_STATE` series for _each_ alert within the rules. If the alert rule has 120 instances (or series) it'll execute the same query with slightly different labels. This PR changes the approach so that we only query once per alert rule and then match the corresponding alert that we're about to restore against the series-set. While the approach might use a bit more memory at start-up (if even?) the restore proccess is only ran once per restart so I'd consider this a big win. This builds on top of #13974 Signed-off-by: gotjosh --- rules/alerting.go | 32 +++++++++++++------------------- rules/group.go | 33 +++++++++++++++++++++++---------- 2 files changed, 36 insertions(+), 29 deletions(-) diff --git a/rules/alerting.go b/rules/alerting.go index 50c67fa2d..1bcf0a034 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -246,13 +246,16 @@ func (r *AlertingRule) sample(alert *Alert, ts time.Time) promql.Sample { return s } -// forStateSample returns the sample for ALERTS_FOR_STATE. +// forStateSample returns a promql.Sample with the rule labels, `ALERTS_FOR_STATE` as the metric name and the rule name as the `alertname` label. +// Optionally, if an alert is provided it'll copy the labels of the alert into the sample labels. func (r *AlertingRule) forStateSample(alert *Alert, ts time.Time, v float64) promql.Sample { lb := labels.NewBuilder(r.labels) - alert.Labels.Range(func(l labels.Label) { - lb.Set(l.Name, l.Value) - }) + if alert != nil { + alert.Labels.Range(func(l labels.Label) { + lb.Set(l.Name, l.Value) + }) + } lb.Set(labels.MetricName, alertForStateMetricName) lb.Set(labels.AlertName, r.name) @@ -265,9 +268,11 @@ func (r *AlertingRule) forStateSample(alert *Alert, ts time.Time, v float64) pro return s } -// QueryforStateSeries returns the series for ALERTS_FOR_STATE. -func (r *AlertingRule) QueryforStateSeries(ctx context.Context, alert *Alert, q storage.Querier) (storage.Series, error) { - smpl := r.forStateSample(alert, time.Now(), 0) +// QueryforStateSeries returns the series for ALERTS_FOR_STATE of the alert rule. +func (r *AlertingRule) QueryforStateSeries(ctx context.Context, q storage.Querier) (storage.SeriesSet, error) { + // We use a sample to ease the building of matchers. + // Don't provide an alert as we want matchers that match all series for the alert rule. + smpl := r.forStateSample(nil, time.Now(), 0) var matchers []*labels.Matcher smpl.Metric.Range(func(l labels.Label) { mt, err := labels.NewMatcher(labels.MatchEqual, l.Name, l.Value) @@ -278,18 +283,7 @@ func (r *AlertingRule) QueryforStateSeries(ctx context.Context, alert *Alert, q }) sset := q.Select(ctx, false, nil, matchers...) - var s storage.Series - for sset.Next() { - // Query assures that smpl.Metric is included in sset.At().Labels(), - // hence just checking the length would act like equality. - // (This is faster than calling labels.Compare again as we already have some info). - if sset.At().Labels().Len() == len(matchers) { - s = sset.At() - break - } - } - - return s, sset.Err() + return sset, sset.Err() } // SetEvaluationDuration updates evaluationDuration to the duration it took to evaluate the rule on its last evaluation. diff --git a/rules/group.go b/rules/group.go index 987136a00..81f7b1df2 100644 --- a/rules/group.go +++ b/rules/group.go @@ -664,19 +664,32 @@ func (g *Group) RestoreForState(ts time.Time) { continue } + sset, err := alertRule.QueryforStateSeries(g.opts.Context, q) + if err != nil { + level.Error(g.logger).Log( + "msg", "Failed to restore 'for' state", + labels.AlertName, alertRule.Name(), + "stage", "Select", + "err", err, + ) + continue + } + + // No results for this alert rule. + if err == nil { + level.Debug(g.logger).Log("msg", "Failed to find a series to restore the 'for' state", labels.AlertName, alertRule.Name()) + continue + } + alertRule.ForEachActiveAlert(func(a *Alert) { var s storage.Series - s, err := alertRule.QueryforStateSeries(g.opts.Context, a, q) - if err != nil { - // Querier Warnings are ignored. We do not care unless we have an error. - level.Error(g.logger).Log( - "msg", "Failed to restore 'for' state", - labels.AlertName, alertRule.Name(), - "stage", "Select", - "err", err, - ) - return + // Find the series for the given alert from the set. + for sset.Next() { + if sset.At().Labels().Hash() == a.Labels.Hash() { + s = sset.At() + break + } } if s == nil { From e6dcbd2e26b73c6e4e4e0b8bc89c3d33f7de05c3 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 23 Apr 2024 19:49:07 +0100 Subject: [PATCH 10/44] bug: nil check against the series set not errors Signed-off-by: gotjosh --- rules/alerting.go | 2 +- rules/group.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rules/alerting.go b/rules/alerting.go index 1bcf0a034..2cadd3ac5 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -281,8 +281,8 @@ func (r *AlertingRule) QueryforStateSeries(ctx context.Context, q storage.Querie } matchers = append(matchers, mt) }) - sset := q.Select(ctx, false, nil, matchers...) + sset := q.Select(ctx, false, nil, matchers...) return sset, sset.Err() } diff --git a/rules/group.go b/rules/group.go index 81f7b1df2..7afeaf96e 100644 --- a/rules/group.go +++ b/rules/group.go @@ -676,7 +676,7 @@ func (g *Group) RestoreForState(ts time.Time) { } // No results for this alert rule. - if err == nil { + if sset == nil { level.Debug(g.logger).Log("msg", "Failed to find a series to restore the 'for' state", labels.AlertName, alertRule.Name()) continue } From 276201598c18b0ddb62defb9badebadc000c4e4a Mon Sep 17 00:00:00 2001 From: gotjosh Date: Wed, 24 Apr 2024 12:27:35 +0100 Subject: [PATCH 11/44] Fix tests and a bug with the series lookup logic. Signed-off-by: gotjosh --- rules/alerting_test.go | 26 ++++--- rules/group.go | 17 ++--- rules/manager_test.go | 162 +++++++++++++++++++++-------------------- 3 files changed, 105 insertions(+), 100 deletions(-) diff --git a/rules/alerting_test.go b/rules/alerting_test.go index ddfe345ef..366297b4a 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -710,19 +710,21 @@ func TestQueryForStateSeries(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, ) - alert := &Alert{ - State: 0, - Labels: labels.EmptyLabels(), - Annotations: labels.EmptyLabels(), - Value: 0, - ActiveAt: time.Time{}, - FiredAt: time.Time{}, - ResolvedAt: time.Time{}, - LastSentAt: time.Time{}, - ValidUntil: time.Time{}, - } + sample := rule.forStateSample(nil, time.Time{}, 0) + var matchersCount int + sample.Metric.Range(func(l labels.Label) { + matchersCount++ + }) - series, err := rule.QueryforStateSeries(context.Background(), alert, querier) + seriesSet, err := rule.QueryforStateSeries(context.Background(), querier) + + var series storage.Series + for seriesSet.Next() { + if seriesSet.At().Labels().Len() == matchersCount { + series = seriesSet.At() + break + } + } require.Equal(t, tst.expectedSeries, series) require.Equal(t, tst.expectedError, err) diff --git a/rules/group.go b/rules/group.go index 7afeaf96e..411ca68e2 100644 --- a/rules/group.go +++ b/rules/group.go @@ -681,21 +681,18 @@ func (g *Group) RestoreForState(ts time.Time) { continue } + result := map[uint64]storage.Series{} + for sset.Next() { + result[sset.At().Labels().DropMetricName().Hash()] = sset.At() + } + alertRule.ForEachActiveAlert(func(a *Alert) { var s storage.Series - // Find the series for the given alert from the set. - for sset.Next() { - if sset.At().Labels().Hash() == a.Labels.Hash() { - s = sset.At() - break - } - } - - if s == nil { + s, ok := result[a.Labels.Hash()] + if !ok { return } - // Series found for the 'for' state. var t int64 var v float64 diff --git a/rules/manager_test.go b/rules/manager_test.go index 50ab6b861..a3bd335d1 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -407,6 +407,7 @@ func TestForStateRestore(t *testing.T) { // Prometheus goes down here. We create new rules and groups. type testInput struct { + name string restoreDuration time.Duration alerts []*Alert @@ -414,105 +415,110 @@ func TestForStateRestore(t *testing.T) { noRestore bool gracePeriod bool downDuration time.Duration + before func() } tests := []testInput{ { - // Normal restore (alerts were not firing). + name: "normal restore (alerts were not firing)", restoreDuration: 15 * time.Minute, alerts: rule.ActiveAlerts(), downDuration: 10 * time.Minute, }, { - // Testing Outage Tolerance. + name: "outage tolerance", restoreDuration: 40 * time.Minute, noRestore: true, num: 2, }, { - // No active alerts. + name: "no active alerts", restoreDuration: 50 * time.Minute, alerts: []*Alert{}, }, + { + name: "test the grace period", + restoreDuration: 25 * time.Minute, + alerts: []*Alert{}, + gracePeriod: true, + before: func() { + for _, duration := range []time.Duration{10 * time.Minute, 15 * time.Minute, 20 * time.Minute} { + evalTime := baseTime.Add(duration) + group.Eval(context.TODO(), evalTime) + } + }, + num: 2, + }, } - testFunc := func(tst testInput) { - newRule := NewAlertingRule( - "HTTPRequestRateLow", - expr, - alertForDuration, - 0, - labels.FromStrings("severity", "critical"), - labels.EmptyLabels(), labels.EmptyLabels(), "", false, nil, - ) - newGroup := NewGroup(GroupOptions{ - Name: "default", - Interval: time.Second, - Rules: []Rule{newRule}, - ShouldRestore: true, - Opts: opts, + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.before != nil { + tt.before() + } + + newRule := NewAlertingRule( + "HTTPRequestRateLow", + expr, + alertForDuration, + 0, + labels.FromStrings("severity", "critical"), + labels.EmptyLabels(), labels.EmptyLabels(), "", false, nil, + ) + newGroup := NewGroup(GroupOptions{ + Name: "default", + Interval: time.Second, + Rules: []Rule{newRule}, + ShouldRestore: true, + Opts: opts, + }) + + newGroups := make(map[string]*Group) + newGroups["default;"] = newGroup + + restoreTime := baseTime.Add(tt.restoreDuration) + // First eval before restoration. + newGroup.Eval(context.TODO(), restoreTime) + // Restore happens here. + newGroup.RestoreForState(restoreTime) + + got := newRule.ActiveAlerts() + for _, aa := range got { + require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) + } + sort.Slice(got, func(i, j int) bool { + return labels.Compare(got[i].Labels, got[j].Labels) < 0 + }) + + // Checking if we have restored it correctly. + switch { + case tt.noRestore: + require.Len(t, got, tt.num) + for _, e := range got { + require.Equal(t, e.ActiveAt, restoreTime) + } + case tt.gracePeriod: + + require.Len(t, got, tt.num) + for _, e := range got { + require.Equal(t, opts.ForGracePeriod, e.ActiveAt.Add(alertForDuration).Sub(restoreTime)) + } + default: + exp := tt.alerts + require.Equal(t, len(exp), len(got)) + sortAlerts(exp) + sortAlerts(got) + for i, e := range exp { + require.Equal(t, e.Labels, got[i].Labels) + + // Difference in time should be within 1e6 ns, i.e. 1ms + // (due to conversion between ns & ms, float64 & int64). + activeAtDiff := float64(e.ActiveAt.Unix() + int64(tt.downDuration/time.Second) - got[i].ActiveAt.Unix()) + require.Equal(t, 0.0, math.Abs(activeAtDiff), "'for' state restored time is wrong") + } + } }) - - newGroups := make(map[string]*Group) - newGroups["default;"] = newGroup - - restoreTime := baseTime.Add(tst.restoreDuration) - // First eval before restoration. - newGroup.Eval(context.TODO(), restoreTime) - // Restore happens here. - newGroup.RestoreForState(restoreTime) - - got := newRule.ActiveAlerts() - for _, aa := range got { - require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) - } - sort.Slice(got, func(i, j int) bool { - return labels.Compare(got[i].Labels, got[j].Labels) < 0 - }) - - // Checking if we have restored it correctly. - switch { - case tst.noRestore: - require.Len(t, got, tst.num) - for _, e := range got { - require.Equal(t, e.ActiveAt, restoreTime) - } - case tst.gracePeriod: - require.Len(t, got, tst.num) - for _, e := range got { - require.Equal(t, opts.ForGracePeriod, e.ActiveAt.Add(alertForDuration).Sub(restoreTime)) - } - default: - exp := tst.alerts - require.Equal(t, len(exp), len(got)) - sortAlerts(exp) - sortAlerts(got) - for i, e := range exp { - require.Equal(t, e.Labels, got[i].Labels) - - // Difference in time should be within 1e6 ns, i.e. 1ms - // (due to conversion between ns & ms, float64 & int64). - activeAtDiff := float64(e.ActiveAt.Unix() + int64(tst.downDuration/time.Second) - got[i].ActiveAt.Unix()) - require.Equal(t, 0.0, math.Abs(activeAtDiff), "'for' state restored time is wrong") - } - } } - - for _, tst := range tests { - testFunc(tst) - } - - // Testing the grace period. - for _, duration := range []time.Duration{10 * time.Minute, 15 * time.Minute, 20 * time.Minute} { - evalTime := baseTime.Add(duration) - group.Eval(context.TODO(), evalTime) - } - testFunc(testInput{ - restoreDuration: 25 * time.Minute, - alerts: []*Alert{}, - gracePeriod: true, - num: 2, - }) } func TestStaleness(t *testing.T) { From fa75985c1c4695b077cefdbb9e6fd9ec73ee8c0e Mon Sep 17 00:00:00 2001 From: gotjosh Date: Wed, 24 Apr 2024 14:04:16 +0100 Subject: [PATCH 12/44] Use the string representation of the labels instead of the hash Signed-off-by: gotjosh --- rules/group.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rules/group.go b/rules/group.go index 411ca68e2..4acce6e60 100644 --- a/rules/group.go +++ b/rules/group.go @@ -681,15 +681,15 @@ func (g *Group) RestoreForState(ts time.Time) { continue } - result := map[uint64]storage.Series{} + result := map[string]storage.Series{} for sset.Next() { - result[sset.At().Labels().DropMetricName().Hash()] = sset.At() + result[sset.At().Labels().DropMetricName().String()] = sset.At() } alertRule.ForEachActiveAlert(func(a *Alert) { var s storage.Series - s, ok := result[a.Labels.Hash()] + s, ok := result[a.Labels.String()] if !ok { return } From 6cfc58430829895dbf10e749da9bddd8ae8c25f4 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Wed, 24 Apr 2024 19:02:47 +0100 Subject: [PATCH 13/44] - Add a changelog entry - Improve variable name of the map produced by the series set Signed-off-by: gotjosh --- CHANGELOG.md | 1 + rules/group.go | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 23d2c89da..58293351c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## unreleased * [CHANGE] TSDB: Fix the predicate checking for blocks which are beyond the retention period to include the ones right at the retention boundary. #9633 +* [CHANGE] Rules: Execute 1 query instead of N (where N is the number of alerts within alert rule) when restoring alerts. #13980 * [ENHANCEMENT] Rules: Add `rule_group_last_restore_duration_seconds` to measure the time it takes to restore a rule group. #13974 ## 2.51.2 / 2024-04-09 diff --git a/rules/group.go b/rules/group.go index 4acce6e60..c4331a465 100644 --- a/rules/group.go +++ b/rules/group.go @@ -681,15 +681,15 @@ func (g *Group) RestoreForState(ts time.Time) { continue } - result := map[string]storage.Series{} + seriesByLabels := map[string]storage.Series{} for sset.Next() { - result[sset.At().Labels().DropMetricName().String()] = sset.At() + seriesByLabels[sset.At().Labels().DropMetricName().String()] = sset.At() } alertRule.ForEachActiveAlert(func(a *Alert) { var s storage.Series - s, ok := result[a.Labels.String()] + s, ok := seriesByLabels[a.Labels.String()] if !ok { return } From 2de2fee0354eaab35e8dbca8388dd6d3d2158220 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Wed, 24 Apr 2024 19:10:34 +0100 Subject: [PATCH 14/44] Allow the result map for the series set before hand with a hint. Signed-off-by: gotjosh --- rules/alerting.go | 7 +++++++ rules/alerting_test.go | 19 +++++++++++++++++++ rules/group.go | 3 ++- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/rules/alerting.go b/rules/alerting.go index 2cadd3ac5..e357cc615 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -542,6 +542,13 @@ func (r *AlertingRule) ForEachActiveAlert(f func(*Alert)) { } } +func (r *AlertingRule) ActiveAlertsCount() int { + r.activeMtx.Lock() + defer r.activeMtx.Unlock() + + return len(r.active) +} + func (r *AlertingRule) sendAlerts(ctx context.Context, ts time.Time, resendDelay, interval time.Duration, notifyFunc NotifyFunc) { alerts := []*Alert{} r.ForEachActiveAlert(func(alert *Alert) { diff --git a/rules/alerting_test.go b/rules/alerting_test.go index 366297b4a..78f6d6715 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -1027,3 +1027,22 @@ func TestAlertingRule_SetNoDependencyRules(t *testing.T) { rule.SetNoDependencyRules(true) require.True(t, rule.NoDependencyRules()) } + +func TestAlertingRule_ActiveAlertsCount(t *testing.T) { + rule := NewAlertingRule( + "TestRule", + nil, + time.Minute, + 0, + labels.FromStrings("severity", "critical"), + labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, + ) + + // Set an active alert. + lbls := labels.FromStrings("a1", "1") + h := lbls.Hash() + al := &Alert{State: StateFiring, Labels: lbls, ActiveAt: time.Now()} + rule.active[h] = al + + require.Equal(t, 1, t rule.ActiveAlertsCount()) +} diff --git a/rules/group.go b/rules/group.go index c4331a465..28a0ff6e1 100644 --- a/rules/group.go +++ b/rules/group.go @@ -681,7 +681,8 @@ func (g *Group) RestoreForState(ts time.Time) { continue } - seriesByLabels := map[string]storage.Series{} + // While not technically the same number of series we expect, it's as good of an approximation as any. + seriesByLabels := make(map[string]storage.Series, alertRule.ActiveAlertsCount()) for sset.Next() { seriesByLabels[sset.At().Labels().DropMetricName().String()] = sset.At() } From cc2207148edf9c348837b2631e4adf6d85a8819b Mon Sep 17 00:00:00 2001 From: gotjosh Date: Wed, 24 Apr 2024 19:20:57 +0100 Subject: [PATCH 15/44] fix typo Signed-off-by: gotjosh --- rules/alerting_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/alerting_test.go b/rules/alerting_test.go index 78f6d6715..a3f97b2a4 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -1044,5 +1044,5 @@ func TestAlertingRule_ActiveAlertsCount(t *testing.T) { al := &Alert{State: StateFiring, Labels: lbls, ActiveAt: time.Now()} rule.active[h] = al - require.Equal(t, 1, t rule.ActiveAlertsCount()) + require.Equal(t, 1, rule.ActiveAlertsCount()) } From 7aacef9b42a275f7ad8f60a707abb6de2a5d1448 Mon Sep 17 00:00:00 2001 From: Arthur Silva Sens Date: Wed, 24 Apr 2024 10:53:54 -0300 Subject: [PATCH 16/44] bugfix: Decouple native histogram ingestions and protobuf parsing Up until this point, if a scrape was done with the protobuf format Prometheus would always try to ingest native histograms even with the feature flag disabled. This causes problems with other feature-flags that depend on the protobuf format, like 'created-timestamp-zero-ingestion'. This commit decouples native histogram parsing from ingestion, making sure ingestion only happens when the 'native-histogram' feature-flag is enabled. Signed-off-by: Arthur Silva Sens --- cmd/prometheus/main.go | 1 + scrape/manager.go | 2 ++ scrape/scrape.go | 64 +++++++++++++++++++++++------------------- scrape/scrape_test.go | 28 ++++++++++++------ 4 files changed, 57 insertions(+), 38 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 0e15d5ca5..8218ffb18 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -217,6 +217,7 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error { level.Info(logger).Log("msg", "Experimental PromQL functions enabled.") case "native-histograms": c.tsdb.EnableNativeHistograms = true + c.scrape.EnableNativeHistogramsIngestion = true // Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers. config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols diff --git a/scrape/manager.go b/scrape/manager.go index a7a8b828e..cb92db5a8 100644 --- a/scrape/manager.go +++ b/scrape/manager.go @@ -81,6 +81,8 @@ type Options struct { // Option to enable the ingestion of the created timestamp as a synthetic zero sample. // See: https://github.com/prometheus/proposals/blob/main/proposals/2023-06-13_created-timestamp.md EnableCreatedTimestampZeroIngestion bool + // Option to enable the ingestion of native histograms. + EnableNativeHistogramsIngestion bool // Optional HTTP client options to use when scraping. HTTPClientOptions []config_util.HTTPClientOption diff --git a/scrape/scrape.go b/scrape/scrape.go index 4bbeab57a..c285f05e3 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -178,6 +178,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed opts.interval, opts.timeout, opts.scrapeClassicHistograms, + options.EnableNativeHistogramsIngestion, options.EnableCreatedTimestampZeroIngestion, options.ExtraMetrics, options.EnableMetadataStorage, @@ -827,7 +828,10 @@ type scrapeLoop struct { interval time.Duration timeout time.Duration scrapeClassicHistograms bool - enableCTZeroIngestion bool + + // Feature flagged options. + enableNativeHistogramIngestion bool + enableCTZeroIngestion bool appender func(ctx context.Context) storage.Appender symbolTable *labels.SymbolTable @@ -1123,6 +1127,7 @@ func newScrapeLoop(ctx context.Context, interval time.Duration, timeout time.Duration, scrapeClassicHistograms bool, + enableNativeHistogramIngestion bool, enableCTZeroIngestion bool, reportExtraMetrics bool, appendMetadataToWAL bool, @@ -1153,33 +1158,34 @@ func newScrapeLoop(ctx context.Context, } sl := &scrapeLoop{ - scraper: sc, - buffers: buffers, - cache: cache, - appender: appender, - symbolTable: symbolTable, - sampleMutator: sampleMutator, - reportSampleMutator: reportSampleMutator, - stopped: make(chan struct{}), - offsetSeed: offsetSeed, - l: l, - parentCtx: ctx, - appenderCtx: appenderCtx, - honorTimestamps: honorTimestamps, - trackTimestampsStaleness: trackTimestampsStaleness, - enableCompression: enableCompression, - sampleLimit: sampleLimit, - bucketLimit: bucketLimit, - maxSchema: maxSchema, - labelLimits: labelLimits, - interval: interval, - timeout: timeout, - scrapeClassicHistograms: scrapeClassicHistograms, - enableCTZeroIngestion: enableCTZeroIngestion, - reportExtraMetrics: reportExtraMetrics, - appendMetadataToWAL: appendMetadataToWAL, - metrics: metrics, - skipOffsetting: skipOffsetting, + scraper: sc, + buffers: buffers, + cache: cache, + appender: appender, + symbolTable: symbolTable, + sampleMutator: sampleMutator, + reportSampleMutator: reportSampleMutator, + stopped: make(chan struct{}), + offsetSeed: offsetSeed, + l: l, + parentCtx: ctx, + appenderCtx: appenderCtx, + honorTimestamps: honorTimestamps, + trackTimestampsStaleness: trackTimestampsStaleness, + enableCompression: enableCompression, + sampleLimit: sampleLimit, + bucketLimit: bucketLimit, + maxSchema: maxSchema, + labelLimits: labelLimits, + interval: interval, + timeout: timeout, + scrapeClassicHistograms: scrapeClassicHistograms, + enableNativeHistogramIngestion: enableNativeHistogramIngestion, + enableCTZeroIngestion: enableCTZeroIngestion, + reportExtraMetrics: reportExtraMetrics, + appendMetadataToWAL: appendMetadataToWAL, + metrics: metrics, + skipOffsetting: skipOffsetting, } sl.ctx, sl.cancel = context.WithCancel(ctx) @@ -1627,7 +1633,7 @@ loop: } } - if isHistogram { + if isHistogram && sl.enableNativeHistogramIngestion { if h != nil { ref, err = app.AppendHistogram(ref, lset, t, h, nil) } else { diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 20b21936b..51bd377e4 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -678,6 +678,7 @@ func newBasicScrapeLoop(t testing.TB, ctx context.Context, scraper scraper, app false, false, false, + false, nil, false, newTestScrapeMetrics(t), @@ -819,6 +820,7 @@ func TestScrapeLoopRun(t *testing.T) { false, false, false, + false, nil, false, scrapeMetrics, @@ -962,6 +964,7 @@ func TestScrapeLoopMetadata(t *testing.T) { false, false, false, + false, nil, false, scrapeMetrics, @@ -1571,6 +1574,7 @@ func TestScrapeLoop_HistogramBucketLimit(t *testing.T) { app := &bucketLimitAppender{Appender: resApp, limit: 2} sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0) + sl.enableNativeHistogramIngestion = true sl.sampleMutator = func(l labels.Labels) labels.Labels { if l.Has("deleteme") { return labels.EmptyLabels() @@ -1797,14 +1801,15 @@ func TestScrapeLoopAppendStalenessIfTrackTimestampStaleness(t *testing.T) { func TestScrapeLoopAppendExemplar(t *testing.T) { tests := []struct { - title string - scrapeClassicHistograms bool - scrapeText string - contentType string - discoveryLabels []string - floats []floatSample - histograms []histogramSample - exemplars []exemplar.Exemplar + title string + scrapeClassicHistograms bool + enableNativeHistogramsIngestion bool + scrapeText string + contentType string + discoveryLabels []string + floats []floatSample + histograms []histogramSample + exemplars []exemplar.Exemplar }{ { title: "Metric without exemplars", @@ -1862,6 +1867,8 @@ metric_total{n="2"} 2 # {t="2"} 2.0 20000 }, { title: "Native histogram with three exemplars", + + enableNativeHistogramsIngestion: true, scrapeText: `name: "test_histogram" help: "Test histogram with many buckets removed to keep it manageable in size." type: HISTOGRAM @@ -1976,6 +1983,8 @@ metric: < }, { title: "Native histogram with three exemplars scraped as classic histogram", + + enableNativeHistogramsIngestion: true, scrapeText: `name: "test_histogram" help: "Test histogram with many buckets removed to keep it manageable in size." type: HISTOGRAM @@ -2115,6 +2124,7 @@ metric: < } sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0) + sl.enableNativeHistogramIngestion = test.enableNativeHistogramsIngestion sl.sampleMutator = func(l labels.Labels) labels.Labels { return mutateSampleLabels(l, discoveryLabels, false, nil) } @@ -3710,7 +3720,7 @@ scrape_configs: s.DB.EnableNativeHistograms() reg := prometheus.NewRegistry() - mng, err := NewManager(nil, nil, s, reg) + mng, err := NewManager(&Options{EnableNativeHistogramsIngestion: true}, nil, s, reg) require.NoError(t, err) cfg, err := config.Load(configStr, false, log.NewNopLogger()) require.NoError(t, err) From dde2e5eb73baac260f41e4b808043ba2aa309810 Mon Sep 17 00:00:00 2001 From: George Robinson Date: Thu, 25 Apr 2024 13:18:50 +0100 Subject: [PATCH 17/44] Improve comments around resending resolved alerts (#13990) Signed-off-by: George Robinson --- rules/alerting.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/rules/alerting.go b/rules/alerting.go index 50c67fa2d..edcdfe5e0 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -457,8 +457,17 @@ func (r *AlertingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc, } } - // If the alert was previously firing, keep it around for a given - // retention time so it is reported as resolved to the AlertManager. + // If the alert is resolved (was firing but is now inactive) keep it for + // at least the retention period. This is important for a number of reasons: + // + // 1. It allows for Prometheus to be more resilient to network issues that + // would otherwise prevent a resolved alert from being reported as resolved + // to Alertmanager. + // + // 2. It helps reduce the chance of resolved notifications being lost if + // Alertmanager crashes or restarts between receiving the resolved alert + // from Prometheus and sending the resolved notification. This tends to + // occur for routes with large Group intervals. if a.State == StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > resolvedRetention) { delete(r.active, fp) } From 31a4217784a1836a81d498e69e1d9ec2a82cf185 Mon Sep 17 00:00:00 2001 From: Stephen Heckler Date: Thu, 25 Apr 2024 12:33:29 -0500 Subject: [PATCH 18/44] discovery(k8s): Only register client-go metrics adapters when needed Previously the metrics adapters for client-go were registered in an init function. This resulted in clobbering default metrics providers when these packages are imported into an application that leverages the default client-go metrics registry. Instead, let's only register these adapters when requested. Signed-off-by: Stephen Heckler --- discovery/metrics.go | 10 ---------- discovery/metrics_k8s_client.go | 8 ++++++++ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/discovery/metrics.go b/discovery/metrics.go index e738331a1..356be1ddc 100644 --- a/discovery/metrics.go +++ b/discovery/metrics.go @@ -19,16 +19,6 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -var ( - clientGoRequestMetrics = &clientGoRequestMetricAdapter{} - clientGoWorkloadMetrics = &clientGoWorkqueueMetricsProvider{} -) - -func init() { - clientGoRequestMetrics.RegisterWithK8sGoClient() - clientGoWorkloadMetrics.RegisterWithK8sGoClient() -} - // Metrics to be used with a discovery manager. type Metrics struct { FailedConfigs prometheus.Gauge diff --git a/discovery/metrics_k8s_client.go b/discovery/metrics_k8s_client.go index f16245684..c13ce5331 100644 --- a/discovery/metrics_k8s_client.go +++ b/discovery/metrics_k8s_client.go @@ -35,6 +35,11 @@ const ( workqueueMetricsNamespace = KubernetesMetricsNamespace + "_workqueue" ) +var ( + clientGoRequestMetrics = &clientGoRequestMetricAdapter{} + clientGoWorkloadMetrics = &clientGoWorkqueueMetricsProvider{} +) + var ( // Metrics for client-go's HTTP requests. clientGoRequestResultMetricVec = prometheus.NewCounterVec( @@ -135,6 +140,9 @@ func clientGoMetrics() []prometheus.Collector { } func RegisterK8sClientMetricsWithPrometheus(registerer prometheus.Registerer) error { + clientGoRequestMetrics.RegisterWithK8sGoClient() + clientGoWorkloadMetrics.RegisterWithK8sGoClient() + for _, collector := range clientGoMetrics() { err := registerer.Register(collector) if err != nil { From 801314901c91faf9945637680eadf283485f1516 Mon Sep 17 00:00:00 2001 From: Nishant Singh Date: Sat, 27 Apr 2024 13:50:41 +0530 Subject: [PATCH 19/44] Update docs/storage.md Co-authored-by: Ayoub Mrini Signed-off-by: Nishant Singh --- docs/storage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/storage.md b/docs/storage.md index aae16a170..21532119f 100644 --- a/docs/storage.md +++ b/docs/storage.md @@ -85,7 +85,7 @@ Prometheus has several flags that configure local storage. The most important ar - `--storage.tsdb.path`: Where Prometheus writes its database. Defaults to `data/`. - `--storage.tsdb.retention.time`: How long to retain samples in storage. When this flag is - set it overrides `storage.tsdb.retention`. If neither this flag nor `storage.tsdb.retention` + set, it overrides `storage.tsdb.retention`. If neither this flag nor `storage.tsdb.retention` nor `storage.tsdb.retention.size` is set, the retention time defaults to `15d`. Units Supported: y, w, d, h, m, s, ms. - `--storage.tsdb.retention.size`: The maximum number of bytes of storage blocks to retain. From c8b23980c9f5b21ada8f9b7e26ea0e2220f7e587 Mon Sep 17 00:00:00 2001 From: Nishant Singh Date: Sat, 27 Apr 2024 13:50:50 +0530 Subject: [PATCH 20/44] Update docs/storage.md Co-authored-by: Ayoub Mrini Signed-off-by: Nishant Singh --- docs/storage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/storage.md b/docs/storage.md index 21532119f..46bb7210e 100644 --- a/docs/storage.md +++ b/docs/storage.md @@ -87,7 +87,7 @@ Prometheus has several flags that configure local storage. The most important ar - `--storage.tsdb.retention.time`: How long to retain samples in storage. When this flag is set, it overrides `storage.tsdb.retention`. If neither this flag nor `storage.tsdb.retention` nor `storage.tsdb.retention.size` is set, the retention time defaults to `15d`. - Units Supported: y, w, d, h, m, s, ms. + Supported units: y, w, d, h, m, s, ms. - `--storage.tsdb.retention.size`: The maximum number of bytes of storage blocks to retain. The oldest data will be removed first. Defaults to `0` or disabled. Units supported: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Only From f7e923c3bb47071e9d810e31889e7cb3da133862 Mon Sep 17 00:00:00 2001 From: Heyoxe <32708033+Heyoxe@users.noreply.github.com> Date: Sat, 27 Apr 2024 16:01:30 +0200 Subject: [PATCH 21/44] fix(scaleway-sd): use public IPs if no private IP present (#13941) * fix(scaleway-sd): use public IPs if no private IP present * tests(scaleway-sd): add instance with routed public ip and no private ip --------- Signed-off-by: Heyoxe <32708033+Heyoxe@users.noreply.github.com> --- discovery/scaleway/instance.go | 9 +- discovery/scaleway/instance_test.go | 24 +++- discovery/scaleway/testdata/instance.json | 142 +++++++++++++++++++++- docs/configuration/configuration.md | 3 +- 4 files changed, 173 insertions(+), 5 deletions(-) diff --git a/discovery/scaleway/instance.go b/discovery/scaleway/instance.go index 9dd786c80..6540f06dc 100644 --- a/discovery/scaleway/instance.go +++ b/discovery/scaleway/instance.go @@ -174,20 +174,25 @@ func (d *instanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, labels[instanceTagsLabel] = model.LabelValue(tags) } + addr := "" if server.IPv6 != nil { labels[instancePublicIPv6Label] = model.LabelValue(server.IPv6.Address.String()) + addr = server.IPv6.Address.String() } if server.PublicIP != nil { labels[instancePublicIPv4Label] = model.LabelValue(server.PublicIP.Address.String()) + addr = server.PublicIP.Address.String() } if server.PrivateIP != nil { labels[instancePrivateIPv4Label] = model.LabelValue(*server.PrivateIP) + addr = *server.PrivateIP + } - addr := net.JoinHostPort(*server.PrivateIP, strconv.FormatUint(uint64(d.port), 10)) + if addr != "" { + addr := net.JoinHostPort(addr, strconv.FormatUint(uint64(d.port), 10)) labels[model.AddressLabel] = model.LabelValue(addr) - targets = append(targets, labels) } } diff --git a/discovery/scaleway/instance_test.go b/discovery/scaleway/instance_test.go index d2449d00c..ae70a9ed2 100644 --- a/discovery/scaleway/instance_test.go +++ b/discovery/scaleway/instance_test.go @@ -60,7 +60,7 @@ api_url: %s tg := tgs[0] require.NotNil(t, tg) require.NotNil(t, tg.Targets) - require.Len(t, tg.Targets, 2) + require.Len(t, tg.Targets, 3) for i, lbls := range []model.LabelSet{ { @@ -110,6 +110,28 @@ api_url: %s "__meta_scaleway_instance_type": "DEV1-S", "__meta_scaleway_instance_zone": "fr-par-1", }, + { + "__address__": "51.158.183.115:80", + "__meta_scaleway_instance_boot_type": "local", + "__meta_scaleway_instance_hostname": "routed-dualstack", + "__meta_scaleway_instance_id": "4904366a-7e26-4b65-b97b-6392c761247a", + "__meta_scaleway_instance_image_arch": "x86_64", + "__meta_scaleway_instance_image_id": "3e0a5b84-1d69-4993-8fa4-0d7df52d5160", + "__meta_scaleway_instance_image_name": "Ubuntu 22.04 Jammy Jellyfish", + "__meta_scaleway_instance_location_cluster_id": "19", + "__meta_scaleway_instance_location_hypervisor_id": "1201", + "__meta_scaleway_instance_location_node_id": "24", + "__meta_scaleway_instance_name": "routed-dualstack", + "__meta_scaleway_instance_organization_id": "20b3d507-96ac-454c-a795-bc731b46b12f", + "__meta_scaleway_instance_project_id": "20b3d507-96ac-454c-a795-bc731b46b12f", + "__meta_scaleway_instance_public_ipv4": "51.158.183.115", + "__meta_scaleway_instance_region": "nl-ams", + "__meta_scaleway_instance_security_group_id": "984414da-9fc2-49c0-a925-fed6266fe092", + "__meta_scaleway_instance_security_group_name": "Default security group", + "__meta_scaleway_instance_status": "running", + "__meta_scaleway_instance_type": "DEV1-S", + "__meta_scaleway_instance_zone": "nl-ams-1", + }, } { t.Run(fmt.Sprintf("item %d", i), func(t *testing.T) { require.Equal(t, lbls, tg.Targets[i]) diff --git a/discovery/scaleway/testdata/instance.json b/discovery/scaleway/testdata/instance.json index f8d35b215..b433f7598 100644 --- a/discovery/scaleway/testdata/instance.json +++ b/discovery/scaleway/testdata/instance.json @@ -216,6 +216,146 @@ "placement_group": null, "private_nics": [], "zone": "fr-par-1" + }, + { + "id": "4904366a-7e26-4b65-b97b-6392c761247a", + "name": "routed-dualstack", + "arch": "x86_64", + "commercial_type": "DEV1-S", + "boot_type": "local", + "organization": "20b3d507-96ac-454c-a795-bc731b46b12f", + "project": "20b3d507-96ac-454c-a795-bc731b46b12f", + "hostname": "routed-dualstack", + "image": { + "id": "3e0a5b84-1d69-4993-8fa4-0d7df52d5160", + "name": "Ubuntu 22.04 Jammy Jellyfish", + "organization": "51b656e3-4865-41e8-adbc-0c45bdd780db", + "project": "51b656e3-4865-41e8-adbc-0c45bdd780db", + "root_volume": { + "id": "13d945b9-5e78-4f9d-8ac4-c4bc2fa7c31a", + "name": "Ubuntu 22.04 Jammy Jellyfish", + "volume_type": "unified", + "size": 10000000000 + }, + "extra_volumes": {}, + "public": true, + "arch": "x86_64", + "creation_date": "2024-02-22T15:52:56.037007+00:00", + "modification_date": "2024-02-22T15:52:56.037007+00:00", + "default_bootscript": null, + "from_server": null, + "state": "available", + "tags": [], + "zone": "nl-ams-1" + }, + "volumes": { + "0": { + "boot": false, + "id": "fe85c817-e67e-4e24-8f13-bde3e9f42620", + "name": "Ubuntu 22.04 Jammy Jellyfish", + "volume_type": "l_ssd", + "export_uri": null, + "organization": "20b3d507-96ac-454c-a795-bc731b46b12f", + "project": "20b3d507-96ac-454c-a795-bc731b46b12f", + "server": { + "id": "4904366a-7e26-4b65-b97b-6392c761247a", + "name": "routed-dualstack" + }, + "size": 20000000000, + "state": "available", + "creation_date": "2024-04-19T14:50:14.019739+00:00", + "modification_date": "2024-04-19T14:50:14.019739+00:00", + "tags": [], + "zone": "nl-ams-1" + } + }, + "tags": [], + "state": "running", + "protected": false, + "state_detail": "booted", + "public_ip": { + "id": "53f8f861-7a11-4b16-a4bc-fb8f4b4a11d0", + "address": "51.158.183.115", + "dynamic": false, + "gateway": "62.210.0.1", + "netmask": "32", + "family": "inet", + "provisioning_mode": "dhcp", + "tags": [], + "state": "attached", + "ipam_id": "ec3499ff-a664-49b7-818a-9fe4b95aee5e" + }, + "public_ips": [ + { + "id": "53f8f861-7a11-4b16-a4bc-fb8f4b4a11d0", + "address": "51.158.183.115", + "dynamic": false, + "gateway": "62.210.0.1", + "netmask": "32", + "family": "inet", + "provisioning_mode": "dhcp", + "tags": [], + "state": "attached", + "ipam_id": "ec3499ff-a664-49b7-818a-9fe4b95aee5e" + }, + { + "id": "f52a8c81-0875-4aee-b96e-eccfc6bec367", + "address": "2001:bc8:1640:1568:dc00:ff:fe21:91b", + "dynamic": false, + "gateway": "fe80::dc00:ff:fe21:91c", + "netmask": "64", + "family": "inet6", + "provisioning_mode": "slaac", + "tags": [], + "state": "attached", + "ipam_id": "40d1e6ea-e932-42f9-8acb-55398bec7ad6" + } + ], + "mac_address": "de:00:00:21:09:1b", + "routed_ip_enabled": true, + "ipv6": null, + "extra_networks": [], + "dynamic_ip_required": false, + "enable_ipv6": false, + "private_ip": null, + "creation_date": "2024-04-19T14:50:14.019739+00:00", + "modification_date": "2024-04-19T14:52:21.181670+00:00", + "bootscript": { + "id": "5a520dda-96d6-4ed2-acd1-1d526b6859fe", + "public": true, + "title": "x86_64 mainline 4.4.182 rev1", + "architecture": "x86_64", + "organization": "11111111-1111-4111-8111-111111111111", + "project": "11111111-1111-4111-8111-111111111111", + "kernel": "http://10.196.2.9/kernel/x86_64-mainline-lts-4.4-4.4.182-rev1/vmlinuz-4.4.182", + "dtb": "", + "initrd": "http://10.196.2.9/initrd/initrd-Linux-x86_64-v3.14.6.gz", + "bootcmdargs": "LINUX_COMMON scaleway boot=local nbd.max_part=16", + "default": true, + "zone": "nl-ams-1" + }, + "security_group": { + "id": "984414da-9fc2-49c0-a925-fed6266fe092", + "name": "Default security group" + }, + "location": { + "zone_id": "ams1", + "platform_id": "23", + "cluster_id": "19", + "hypervisor_id": "1201", + "node_id": "24" + }, + "maintenances": [], + "allowed_actions": [ + "poweroff", + "terminate", + "reboot", + "stop_in_place", + "backup" + ], + "placement_group": null, + "private_nics": [], + "zone": "nl-ams-1" } ] -} +} \ No newline at end of file diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 2f2e07a0c..a90defc78 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -2952,9 +2952,10 @@ The following meta labels are available on targets during [relabeling](#relabel_ * `__meta_scaleway_instance_type`: commercial type of the server * `__meta_scaleway_instance_zone`: the zone of the server (ex: `fr-par-1`, complete list [here](https://developers.scaleway.com/en/products/instance/api/#introduction)) -This role uses the private IPv4 address by default. This can be +This role uses the first address it finds in the following order: private IPv4, public IPv4, public IPv6. This can be changed with relabeling, as demonstrated in [the Prometheus scaleway-sd configuration file](/documentation/examples/prometheus-scaleway.yml). +Should an instance have no address before relabeling, it will not be added to the target list and you will not be able to relabel it. #### Baremetal role From 99f9d32499e6b80b550252b55d430dc7dd171dbb Mon Sep 17 00:00:00 2001 From: Neeraj Gartia <80708727+NeerajGartia21@users.noreply.github.com> Date: Mon, 29 Apr 2024 14:44:01 +0530 Subject: [PATCH 22/44] UTF-8: updates UI parser to support UTF-8 characters (#13590) Signed-off-by: Neeraj Gartia --- .../src/complete/hybrid.test.ts | 48 ++++++ .../codemirror-promql/src/complete/hybrid.ts | 35 ++++- .../codemirror-promql/src/parser/matcher.ts | 82 +++++++--- .../src/parser/parser.test.ts | 143 ++++++++++++++++++ .../codemirror-promql/src/parser/parser.ts | 18 ++- web/ui/module/lezer-promql/src/promql.grammar | 16 +- .../module/lezer-promql/test/expression.txt | 105 ++++++++++--- 7 files changed, 396 insertions(+), 51 deletions(-) diff --git a/web/ui/module/codemirror-promql/src/complete/hybrid.test.ts b/web/ui/module/codemirror-promql/src/complete/hybrid.test.ts index 0f1a8b80a..7b20bfce3 100644 --- a/web/ui/module/codemirror-promql/src/complete/hybrid.test.ts +++ b/web/ui/module/codemirror-promql/src/complete/hybrid.test.ts @@ -251,6 +251,12 @@ describe('analyzeCompletion test', () => { pos: 11, // cursor is between the bracket after the string myL expectedContext: [{ kind: ContextKind.LabelName }], }, + { + title: 'continue to autocomplete QuotedLabelName in aggregate modifier', + expr: 'sum by ("myL")', + pos: 12, // cursor is between the bracket after the string myL + expectedContext: [{ kind: ContextKind.LabelName }], + }, { title: 'autocomplete labelName in a list', expr: 'sum by (myLabel1,)', @@ -263,6 +269,12 @@ describe('analyzeCompletion test', () => { pos: 23, // cursor is between the bracket after the string myLab expectedContext: [{ kind: ContextKind.LabelName }], }, + { + title: 'autocomplete labelName in a list 2', + expr: 'sum by ("myLabel1", "myLab")', + pos: 27, // cursor is between the bracket after the string myLab + expectedContext: [{ kind: ContextKind.LabelName }], + }, { title: 'autocomplete labelName associated to a metric', expr: 'metric_name{}', @@ -299,6 +311,12 @@ describe('analyzeCompletion test', () => { pos: 22, // cursor is between the bracket after the comma expectedContext: [{ kind: ContextKind.LabelName, metricName: '' }], }, + { + title: 'continue to autocomplete quoted labelName associated to a metric', + expr: '{"metric_"}', + pos: 10, // cursor is between the bracket after the string metric_ + expectedContext: [{ kind: ContextKind.MetricName, metricName: 'metric_' }], + }, { title: 'autocomplete the labelValue with metricName + labelName', expr: 'metric_name{labelName=""}', @@ -342,6 +360,30 @@ describe('analyzeCompletion test', () => { }, ], }, + { + title: 'autocomplete the labelValue with metricName + quoted labelName', + expr: 'metric_name{labelName="labelValue", "labelName"!=""}', + pos: 50, // cursor is between the quotes + expectedContext: [ + { + kind: ContextKind.LabelValue, + metricName: 'metric_name', + labelName: 'labelName', + matchers: [ + { + name: 'labelName', + type: Neq, + value: '', + }, + { + name: 'labelName', + type: EqlSingle, + value: 'labelValue', + }, + ], + }, + ], + }, { title: 'autocomplete the labelValue associated to a labelName', expr: '{labelName=""}', @@ -427,6 +469,12 @@ describe('analyzeCompletion test', () => { pos: 22, // cursor is after '!' expectedContext: [{ kind: ContextKind.MatchOp }], }, + { + title: 'autocomplete matchOp 3', + expr: 'metric_name{"labelName"!}', + pos: 24, // cursor is after '!' + expectedContext: [{ kind: ContextKind.BinOp }], + }, { title: 'autocomplete duration with offset', expr: 'http_requests_total offset 5', diff --git a/web/ui/module/codemirror-promql/src/complete/hybrid.ts b/web/ui/module/codemirror-promql/src/complete/hybrid.ts index cf23aa11a..46748d5dc 100644 --- a/web/ui/module/codemirror-promql/src/complete/hybrid.ts +++ b/web/ui/module/codemirror-promql/src/complete/hybrid.ts @@ -29,7 +29,6 @@ import { GroupingLabels, Gte, Gtr, - LabelMatcher, LabelMatchers, LabelName, Lss, @@ -52,6 +51,9 @@ import { SubqueryExpr, Unless, VectorSelector, + UnquotedLabelMatcher, + QuotedLabelMatcher, + QuotedLabelName, } from '@prometheus-io/lezer-promql'; import { Completion, CompletionContext, CompletionResult } from '@codemirror/autocomplete'; import { EditorState } from '@codemirror/state'; @@ -181,7 +183,10 @@ export function computeStartCompletePosition(node: SyntaxNode, pos: number): num let start = node.from; if (node.type.id === LabelMatchers || node.type.id === GroupingLabels) { start = computeStartCompleteLabelPositionInLabelMatcherOrInGroupingLabel(node, pos); - } else if (node.type.id === FunctionCallBody || (node.type.id === StringLiteral && node.parent?.type.id === LabelMatcher)) { + } else if ( + node.type.id === FunctionCallBody || + (node.type.id === StringLiteral && (node.parent?.type.id === UnquotedLabelMatcher || node.parent?.type.id === QuotedLabelMatcher)) + ) { // When the cursor is between bracket, quote, we need to increment the starting position to avoid to consider the open bracket/ first string. start++; } else if ( @@ -212,7 +217,7 @@ export function analyzeCompletion(state: EditorState, node: SyntaxNode): Context result.push({ kind: ContextKind.Duration }); break; } - if (node.parent?.type.id === LabelMatcher) { + if (node.parent?.type.id === UnquotedLabelMatcher || node.parent?.type.id === QuotedLabelMatcher) { // In this case the current token is not itself a valid match op yet: // metric_name{labelName!} result.push({ kind: ContextKind.MatchOp }); @@ -380,7 +385,7 @@ export function analyzeCompletion(state: EditorState, node: SyntaxNode): Context // sum by (myL) // So we have to continue to autocomplete any kind of labelName result.push({ kind: ContextKind.LabelName }); - } else if (node.parent?.type.id === LabelMatcher) { + } else if (node.parent?.type.id === UnquotedLabelMatcher) { // In that case we are in the given situation: // metric_name{myL} or {myL} // so we have or to continue to autocomplete any kind of labelName or @@ -389,9 +394,9 @@ export function analyzeCompletion(state: EditorState, node: SyntaxNode): Context } break; case StringLiteral: - if (node.parent?.type.id === LabelMatcher) { + if (node.parent?.type.id === UnquotedLabelMatcher || node.parent?.type.id === QuotedLabelMatcher) { // In this case we are in the given situation: - // metric_name{labelName=""} + // metric_name{labelName=""} or metric_name{"labelName"=""} // So we can autocomplete the labelValue // Get the labelName. @@ -399,18 +404,34 @@ export function analyzeCompletion(state: EditorState, node: SyntaxNode): Context let labelName = ''; if (node.parent.firstChild?.type.id === LabelName) { labelName = state.sliceDoc(node.parent.firstChild.from, node.parent.firstChild.to); + } else if (node.parent.firstChild?.type.id === QuotedLabelName) { + labelName = state.sliceDoc(node.parent.firstChild.from, node.parent.firstChild.to).slice(1, -1); } // then find the metricName if it exists const metricName = getMetricNameInVectorSelector(node, state); // finally get the full matcher available const matcherNode = walkBackward(node, LabelMatchers); - const labelMatchers = buildLabelMatchers(matcherNode ? matcherNode.getChildren(LabelMatcher) : [], state); + const labelMatcherOpts = [QuotedLabelName, QuotedLabelMatcher, UnquotedLabelMatcher]; + let labelMatchers: Matcher[] = []; + for (const labelMatcherOpt of labelMatcherOpts) { + labelMatchers = labelMatchers.concat(buildLabelMatchers(matcherNode ? matcherNode.getChildren(labelMatcherOpt) : [], state)); + } result.push({ kind: ContextKind.LabelValue, metricName: metricName, labelName: labelName, matchers: labelMatchers, }); + } else if (node.parent?.parent?.type.id === GroupingLabels) { + // In this case we are in the given situation: + // sum by ("myL") + // So we have to continue to autocomplete any kind of labelName + result.push({ kind: ContextKind.LabelName }); + } else if (node.parent?.parent?.type.id === LabelMatchers) { + // In that case we are in the given situation: + // {""} or {"metric_"} + // since this is for the QuotedMetricName we need to continue to autocomplete for the metric names + result.push({ kind: ContextKind.MetricName, metricName: state.sliceDoc(node.from, node.to).slice(1, -1) }); } break; case NumberLiteral: diff --git a/web/ui/module/codemirror-promql/src/parser/matcher.ts b/web/ui/module/codemirror-promql/src/parser/matcher.ts index f432ffe28..99e2e3969 100644 --- a/web/ui/module/codemirror-promql/src/parser/matcher.ts +++ b/web/ui/module/codemirror-promql/src/parser/matcher.ts @@ -12,33 +12,75 @@ // limitations under the License. import { SyntaxNode } from '@lezer/common'; -import { EqlRegex, EqlSingle, LabelName, MatchOp, Neq, NeqRegex, StringLiteral } from '@prometheus-io/lezer-promql'; +import { + EqlRegex, + EqlSingle, + LabelName, + MatchOp, + Neq, + NeqRegex, + StringLiteral, + UnquotedLabelMatcher, + QuotedLabelMatcher, + QuotedLabelName, +} from '@prometheus-io/lezer-promql'; import { EditorState } from '@codemirror/state'; import { Matcher } from '../types'; function createMatcher(labelMatcher: SyntaxNode, state: EditorState): Matcher { const matcher = new Matcher(0, '', ''); const cursor = labelMatcher.cursor(); - if (!cursor.next()) { - // weird case, that would mean the labelMatcher doesn't have any child. - return matcher; - } - do { - switch (cursor.type.id) { - case LabelName: - matcher.name = state.sliceDoc(cursor.from, cursor.to); - break; - case MatchOp: - const ope = cursor.node.firstChild; - if (ope) { - matcher.type = ope.type.id; + switch (cursor.type.id) { + case QuotedLabelMatcher: + if (!cursor.next()) { + // weird case, that would mean the QuotedLabelMatcher doesn't have any child. + return matcher; + } + do { + switch (cursor.type.id) { + case QuotedLabelName: + matcher.name = state.sliceDoc(cursor.from, cursor.to).slice(1, -1); + break; + case MatchOp: + const ope = cursor.node.firstChild; + if (ope) { + matcher.type = ope.type.id; + } + break; + case StringLiteral: + matcher.value = state.sliceDoc(cursor.from, cursor.to).slice(1, -1); + break; } - break; - case StringLiteral: - matcher.value = state.sliceDoc(cursor.from, cursor.to).slice(1, -1); - break; - } - } while (cursor.nextSibling()); + } while (cursor.nextSibling()); + break; + case UnquotedLabelMatcher: + if (!cursor.next()) { + // weird case, that would mean the UnquotedLabelMatcher doesn't have any child. + return matcher; + } + do { + switch (cursor.type.id) { + case LabelName: + matcher.name = state.sliceDoc(cursor.from, cursor.to); + break; + case MatchOp: + const ope = cursor.node.firstChild; + if (ope) { + matcher.type = ope.type.id; + } + break; + case StringLiteral: + matcher.value = state.sliceDoc(cursor.from, cursor.to).slice(1, -1); + break; + } + } while (cursor.nextSibling()); + break; + case QuotedLabelName: + matcher.name = '__name__'; + matcher.value = state.sliceDoc(cursor.from, cursor.to).slice(1, -1); + matcher.type = EqlSingle; + break; + } return matcher; } diff --git a/web/ui/module/codemirror-promql/src/parser/parser.test.ts b/web/ui/module/codemirror-promql/src/parser/parser.test.ts index 54b95553c..2bc7e67ff 100644 --- a/web/ui/module/codemirror-promql/src/parser/parser.test.ts +++ b/web/ui/module/codemirror-promql/src/parser/parser.test.ts @@ -204,6 +204,11 @@ describe('promql operations', () => { expectedValueType: ValueType.vector, expectedDiag: [] as Diagnostic[], }, + { + expr: 'foo and on(test,"blub") bar', + expectedValueType: ValueType.vector, + expectedDiag: [] as Diagnostic[], + }, { expr: 'foo and on() bar', expectedValueType: ValueType.vector, @@ -214,6 +219,11 @@ describe('promql operations', () => { expectedValueType: ValueType.vector, expectedDiag: [] as Diagnostic[], }, + { + expr: 'foo and ignoring(test,"blub") bar', + expectedValueType: ValueType.vector, + expectedDiag: [] as Diagnostic[], + }, { expr: 'foo and ignoring() bar', expectedValueType: ValueType.vector, @@ -229,6 +239,11 @@ describe('promql operations', () => { expectedValueType: ValueType.vector, expectedDiag: [] as Diagnostic[], }, + { + expr: 'foo / on(test,blub) group_left("bar") bar', + expectedValueType: ValueType.vector, + expectedDiag: [] as Diagnostic[], + }, { expr: 'foo / ignoring(test,blub) group_left(blub) bar', expectedValueType: ValueType.vector, @@ -825,6 +840,134 @@ describe('promql operations', () => { expectedValueType: ValueType.vector, expectedDiag: [], }, + { + expr: '{"foo"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + // with metric name in the middle + expr: '{a="b","foo",c~="d"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"foo", a="bc"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"colon:in:the:middle"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"dot.in.the.middle"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"😀 in metric name"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + // quotes with escape + expr: '{"this is \"foo\" metric"}', // eslint-disable-line + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"foo","colon:in:the:middle"="val"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"foo","dot.in.the.middle"="val"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"foo","😀 in label name"="val"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + // quotes with escape + expr: '{"foo","this is \"bar\" label"="val"}', // eslint-disable-line + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: 'foo{"bar"}', + expectedValueType: ValueType.vector, + expectedDiag: [ + { + from: 0, + message: 'metric name must not be set twice: foo or bar', + severity: 'error', + to: 10, + }, + ], + }, + { + expr: '{"foo", __name__="bar"}', + expectedValueType: ValueType.vector, + expectedDiag: [ + { + from: 0, + message: 'metric name must not be set twice: foo or bar', + severity: 'error', + to: 23, + }, + ], + }, + { + expr: '{"foo", "__name__"="bar"}', + expectedValueType: ValueType.vector, + expectedDiag: [ + { + from: 0, + message: 'metric name must not be set twice: foo or bar', + severity: 'error', + to: 25, + }, + ], + }, + { + expr: '{"__name__"="foo", __name__="bar"}', + expectedValueType: ValueType.vector, + expectedDiag: [ + { + from: 0, + message: 'metric name must not be set twice: foo or bar', + severity: 'error', + to: 34, + }, + ], + }, + { + expr: '{"foo", "bar"}', + expectedValueType: ValueType.vector, + expectedDiag: [ + { + from: 0, + to: 14, + message: 'metric name must not be set twice: foo or bar', + severity: 'error', + }, + ], + }, + { + expr: `{'foo\`metric':'bar'}`, // eslint-disable-line + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{`foo\"metric`=`bar`}', // eslint-disable-line + expectedValueType: ValueType.vector, + expectedDiag: [], + }, ]; testCases.forEach((value) => { const state = createEditorState(value.expr); diff --git a/web/ui/module/codemirror-promql/src/parser/parser.ts b/web/ui/module/codemirror-promql/src/parser/parser.ts index 58e56185c..fba7b7b6b 100644 --- a/web/ui/module/codemirror-promql/src/parser/parser.ts +++ b/web/ui/module/codemirror-promql/src/parser/parser.ts @@ -27,7 +27,6 @@ import { Gte, Gtr, Identifier, - LabelMatcher, LabelMatchers, Lss, Lte, @@ -36,11 +35,14 @@ import { Or, ParenExpr, Quantile, + QuotedLabelMatcher, + QuotedLabelName, StepInvariantExpr, SubqueryExpr, Topk, UnaryExpr, Unless, + UnquotedLabelMatcher, VectorSelector, } from '@prometheus-io/lezer-promql'; import { containsAtLeastOneChild } from './path-finder'; @@ -282,7 +284,11 @@ export class Parser { private checkVectorSelector(node: SyntaxNode): void { const matchList = node.getChild(LabelMatchers); - const labelMatchers = buildLabelMatchers(matchList ? matchList.getChildren(LabelMatcher) : [], this.state); + const labelMatcherOpts = [QuotedLabelName, QuotedLabelMatcher, UnquotedLabelMatcher]; + let labelMatchers: Matcher[] = []; + for (const labelMatcherOpt of labelMatcherOpts) { + labelMatchers = labelMatchers.concat(buildLabelMatchers(matchList ? matchList.getChildren(labelMatcherOpt) : [], this.state)); + } let vectorSelectorName = ''; // VectorSelector ( Identifier ) // https://github.com/promlabs/lezer-promql/blob/71e2f9fa5ae6f5c5547d5738966cd2512e6b99a8/src/promql.grammar#L200 @@ -301,6 +307,14 @@ export class Parser { // adding the metric name as a Matcher to avoid a false positive for this kind of expression: // foo{bare=''} labelMatchers.push(new Matcher(EqlSingle, '__name__', vectorSelectorName)); + } else { + // In this case when metric name is not set outside the braces + // It is checking whether metric name is set twice like in : + // {__name__:"foo", "foo"}, {"foo", "bar"} + const labelMatchersMetricName = labelMatchers.filter((lm) => lm.name === '__name__'); + if (labelMatchersMetricName.length > 1) { + this.addDiagnostic(node, `metric name must not be set twice: ${labelMatchersMetricName[0].value} or ${labelMatchersMetricName[1].value}`); + } } // A Vector selector must contain at least one non-empty matcher to prevent diff --git a/web/ui/module/lezer-promql/src/promql.grammar b/web/ui/module/lezer-promql/src/promql.grammar index 496648317..fd4edddf2 100644 --- a/web/ui/module/lezer-promql/src/promql.grammar +++ b/web/ui/module/lezer-promql/src/promql.grammar @@ -97,7 +97,7 @@ binModifiers { } GroupingLabels { - "(" (LabelName ("," LabelName)* ","?)? ")" + "(" ((LabelName | QuotedLabelName) ("," (LabelName | QuotedLabelName))* ","?)? ")" } FunctionCall { @@ -220,7 +220,7 @@ VectorSelector { } LabelMatchers { - "{" (LabelMatcher ("," LabelMatcher)* ","?)? "}" + "{" ((UnquotedLabelMatcher | QuotedLabelMatcher | QuotedLabelName)("," (UnquotedLabelMatcher | QuotedLabelMatcher | QuotedLabelName))* ","?)? "}" } MatchOp { @@ -230,8 +230,16 @@ MatchOp { NeqRegex } -LabelMatcher { - LabelName MatchOp StringLiteral +UnquotedLabelMatcher { + LabelName MatchOp StringLiteral +} + +QuotedLabelMatcher { + QuotedLabelName MatchOp StringLiteral +} + +QuotedLabelName { + StringLiteral } StepInvariantExpr { diff --git a/web/ui/module/lezer-promql/test/expression.txt b/web/ui/module/lezer-promql/test/expression.txt index 2e2b2f40b..daba7d800 100644 --- a/web/ui/module/lezer-promql/test/expression.txt +++ b/web/ui/module/lezer-promql/test/expression.txt @@ -112,6 +112,54 @@ PromQL( ) ) +# Quoted label name in grouping labels + +sum by("job", mode) (test_metric) / on("job") group_left sum by("job")(test_metric) + +==> + +PromQL( + BinaryExpr( + AggregateExpr( + AggregateOp(Sum), + AggregateModifier( + By, + GroupingLabels( + QuotedLabelName(StringLiteral), + LabelName + ) + ), + FunctionCallBody( + VectorSelector( + Identifier + ) + ) + ), + Div, + MatchingModifierClause( + On, + GroupingLabels( + QuotedLabelName(StringLiteral) + ) + GroupLeft + ), + AggregateExpr( + AggregateOp(Sum), + AggregateModifier( + By, + GroupingLabels( + QuotedLabelName(StringLiteral) + ) + ), + FunctionCallBody( + VectorSelector( + Identifier + ) + ) + ) + ) +) + # Case insensitivity for aggregations and binop modifiers. SuM BY(testlabel1) (testmetric1) / IGNOring(testlabel2) AVG withOUT(testlabel3) (testmetric2) @@ -226,25 +274,25 @@ PromQL( VectorSelector( Identifier, LabelMatchers( - LabelMatcher( - LabelName, - MatchOp(EqlSingle), - StringLiteral + UnquotedLabelMatcher( + LabelName, + MatchOp(EqlSingle), + StringLiteral ), - LabelMatcher( - LabelName, - MatchOp(Neq), - StringLiteral + UnquotedLabelMatcher( + LabelName, + MatchOp(Neq), + StringLiteral ), - LabelMatcher( - LabelName, - MatchOp(EqlRegex), - StringLiteral + UnquotedLabelMatcher( + LabelName, + MatchOp(EqlRegex), + StringLiteral ), - LabelMatcher( - LabelName, - MatchOp(NeqRegex), - StringLiteral + UnquotedLabelMatcher( + LabelName, + MatchOp(NeqRegex), + StringLiteral ) ) ) @@ -571,14 +619,14 @@ PromQL(NumberLiteral) NaN{foo="bar"} ==> -PromQL(BinaryExpr(NumberLiteral,⚠,VectorSelector(LabelMatchers(LabelMatcher(LabelName,MatchOp(EqlSingle),StringLiteral))))) +PromQL(BinaryExpr(NumberLiteral,⚠,VectorSelector(LabelMatchers(UnquotedLabelMatcher(LabelName,MatchOp(EqlSingle),StringLiteral))))) # Trying to illegally use Inf as a metric name. Inf{foo="bar"} ==> -PromQL(BinaryExpr(NumberLiteral,⚠,VectorSelector(LabelMatchers(LabelMatcher(LabelName,MatchOp(EqlSingle),StringLiteral))))) +PromQL(BinaryExpr(NumberLiteral,⚠,VectorSelector(LabelMatchers(UnquotedLabelMatcher(LabelName,MatchOp(EqlSingle),StringLiteral))))) # Negative offset @@ -614,3 +662,24 @@ MetricName(Identifier) ==> PromQL(BinaryExpr(NumberLiteral,Add,BinaryExpr(VectorSelector(Identifier),Atan2,VectorSelector(Identifier)))) + +# Testing quoted metric name + +{"metric_name"} + +==> +PromQL(VectorSelector(LabelMatchers(QuotedLabelName(StringLiteral)))) + +# Testing quoted label name + +{"foo"="bar"} + +==> +PromQL(VectorSelector(LabelMatchers(QuotedLabelMatcher(QuotedLabelName(StringLiteral), MatchOp(EqlSingle), StringLiteral)))) + +# Testing quoted metric name and label name + +{"metric_name", "foo"="bar"} + +==> +PromQL(VectorSelector(LabelMatchers(QuotedLabelName(StringLiteral), QuotedLabelMatcher(QuotedLabelName(StringLiteral), MatchOp(EqlSingle), StringLiteral)))) \ No newline at end of file From b974a9927923bf40dc580556b7d0fc3db86260a0 Mon Sep 17 00:00:00 2001 From: komisan19 <18901496+komisan19@users.noreply.github.com> Date: Tue, 30 Apr 2024 10:45:20 +0900 Subject: [PATCH 23/44] fix Signed-off-by: komisan19 <18901496+komisan19@users.noreply.github.com> --- .../otlptranslator/prometheusremotewrite/helper.go | 14 +++++--------- .../prometheusremotewrite/metrics_to_prw.go | 2 +- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/storage/remote/otlptranslator/prometheusremotewrite/helper.go index da4ca07d8..d9d80cdc7 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -415,36 +415,32 @@ func mostRecentTimestampInMetric(metric pmetric.Metric) pcommon.Timestamp { case pmetric.MetricTypeGauge: dataPoints := metric.Gauge().DataPoints() for x := 0; x < dataPoints.Len(); x++ { - ts = maxTimestamp(ts, dataPoints.At(x).Timestamp()) + ts = max(ts, dataPoints.At(x).Timestamp()) } case pmetric.MetricTypeSum: dataPoints := metric.Sum().DataPoints() for x := 0; x < dataPoints.Len(); x++ { - ts = maxTimestamp(ts, dataPoints.At(x).Timestamp()) + ts = max(ts, dataPoints.At(x).Timestamp()) } case pmetric.MetricTypeHistogram: dataPoints := metric.Histogram().DataPoints() for x := 0; x < dataPoints.Len(); x++ { - ts = maxTimestamp(ts, dataPoints.At(x).Timestamp()) + ts = max(ts, dataPoints.At(x).Timestamp()) } case pmetric.MetricTypeExponentialHistogram: dataPoints := metric.ExponentialHistogram().DataPoints() for x := 0; x < dataPoints.Len(); x++ { - ts = maxTimestamp(ts, dataPoints.At(x).Timestamp()) + ts = max(ts, dataPoints.At(x).Timestamp()) } case pmetric.MetricTypeSummary: dataPoints := metric.Summary().DataPoints() for x := 0; x < dataPoints.Len(); x++ { - ts = maxTimestamp(ts, dataPoints.At(x).Timestamp()) + ts = max(ts, dataPoints.At(x).Timestamp()) } } return ts } -func maxTimestamp(a, b pcommon.Timestamp) pcommon.Timestamp { - return max(a, b) -} - // addSingleSummaryDataPoint converts pt to len(QuantileValues) + 2 samples. func addSingleSummaryDataPoint(pt pmetric.SummaryDataPoint, resource pcommon.Resource, metric pmetric.Metric, settings Settings, tsMap map[string]*prompb.TimeSeries, baseName string) { diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go index fb141034a..7d51b9ee2 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go @@ -45,7 +45,7 @@ func FromMetrics(md pmetric.Metrics, settings Settings) (tsMap map[string]*promp // TODO: decide if instrumentation library information should be exported as labels for k := 0; k < metricSlice.Len(); k++ { metric := metricSlice.At(k) - mostRecentTimestamp = maxTimestamp(mostRecentTimestamp, mostRecentTimestampInMetric(metric)) + mostRecentTimestamp = max(mostRecentTimestamp, mostRecentTimestampInMetric(metric)) if !isValidAggregationTemporality(metric) { errs = multierr.Append(errs, fmt.Errorf("invalid temporality and type combination for metric %q", metric.Name())) From 9fda9443d492aaaa5de31e6e1e9bba085be2fc30 Mon Sep 17 00:00:00 2001 From: guangwu Date: Tue, 30 Apr 2024 16:47:10 +0800 Subject: [PATCH 24/44] fix(promql/query_logger): close file in error handling (#13948) Signed-off-by: guoguangwu --- promql/query_logger.go | 2 ++ promql/query_logger_test.go | 5 +---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/promql/query_logger.go b/promql/query_logger.go index fa4e1fb07..7ddd8c2d5 100644 --- a/promql/query_logger.go +++ b/promql/query_logger.go @@ -96,12 +96,14 @@ func getMMapedFile(filename string, filesize int, logger log.Logger) ([]byte, io err = file.Truncate(int64(filesize)) if err != nil { + file.Close() level.Error(logger).Log("msg", "Error setting filesize.", "filesize", filesize, "err", err) return nil, nil, err } fileAsBytes, err := mmap.Map(file, mmap.RDWR, 0) if err != nil { + file.Close() level.Error(logger).Log("msg", "Failed to mmap", "file", filename, "Attempted size", filesize, "err", err) return nil, nil, err } diff --git a/promql/query_logger_test.go b/promql/query_logger_test.go index 4135753fd..376d61b64 100644 --- a/promql/query_logger_test.go +++ b/promql/query_logger_test.go @@ -110,10 +110,7 @@ func TestMMapFile(t *testing.T) { filename := file.Name() defer os.Remove(filename) - fileAsBytes, closer, err := getMMapedFile(filename, 2, nil) - if err != nil { - t.Cleanup(func() { closer.Close() }) - } + fileAsBytes, _, err := getMMapedFile(filename, 2, nil) require.NoError(t, err) copy(fileAsBytes, "ab") From 7554384dac12ed8cb637161843eca0534662d730 Mon Sep 17 00:00:00 2001 From: Jesus Vazquez Date: Tue, 30 Apr 2024 11:29:52 +0200 Subject: [PATCH 25/44] otlp: Prometheus to own its own copy of the otlptranslator package (#13991) After a lot of productive discussion between the Prometheus and OpenTelemetry community we decided that it made sense for Prometheus to own its own copy of the code in charge for handling OTLP ingestion traffic. This commit is removing the README and update-copy.sh files that had the previous steps to update the code. Also it is updating the licensing of all the files to make sure the OpenTelemetry provenance is explicit and to state the new ownership. Signed-off-by: Jesus Vazquez Co-authored-by: Arve Knudsen --- storage/remote/otlptranslator/README.md | 22 - .../prometheus/normalize_label.go | 21 +- .../prometheus/normalize_name.go | 21 +- .../otlptranslator/prometheus/unit_to_ucum.go | 21 +- .../prometheusremotewrite/helper.go | 536 +++++++++--------- .../prometheusremotewrite/histograms.go | 77 +-- .../prometheusremotewrite/metrics_to_prw.go | 168 ++++-- .../number_data_points.go | 178 +++--- .../otlp_to_openmetrics_metadata.go | 23 +- storage/remote/otlptranslator/update-copy.sh | 27 - 10 files changed, 596 insertions(+), 498 deletions(-) delete mode 100644 storage/remote/otlptranslator/README.md delete mode 100755 storage/remote/otlptranslator/update-copy.sh diff --git a/storage/remote/otlptranslator/README.md b/storage/remote/otlptranslator/README.md deleted file mode 100644 index 774fac5a7..000000000 --- a/storage/remote/otlptranslator/README.md +++ /dev/null @@ -1,22 +0,0 @@ -## Copying from opentelemetry/opentelemetry-collector-contrib - -This files in the `prometheus/` and `prometheusremotewrite/` are copied from the OpenTelemetry Project[^1]. - -This is done instead of adding a go.mod dependency because OpenTelemetry depends on `prometheus/prometheus` and a cyclic dependency will be created. This is just a temporary solution and the long-term solution is to move the required packages from OpenTelemetry into `prometheus/prometheus`. - -To update the dependency is a multi-step process: -1. Vendor the latest `prometheus/prometheus`@`main` into [`opentelemetry/opentelemetry-collector-contrib`](https://github.com/open-telemetry/opentelemetry-collector-contrib) -1. Update the VERSION in `update-copy.sh`. -1. Run `./update-copy.sh`. - -### Why copy? - -This is because the packages we copy depend on the [`prompb`](https://github.com/prometheus/prometheus/blob/main/prompb) package. While the package is relatively stable, there are still changes. For example, https://github.com/prometheus/prometheus/pull/11935 changed the types. -This means if we depend on the upstream packages directly, we will never able to make the changes like above. Hence we're copying the code for now. - -### I need to manually change these files - -When we do want to make changes to the types in `prompb`, we might need to edit the files directly. That is OK, please let @gouthamve or @jesusvazquez know so they can take care of updating the upstream code (by vendoring in `prometheus/prometheus` upstream and resolving conflicts) and then will run the copy -script again to keep things updated. - -[^1]: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/pkg/translator/prometheus and https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/pkg/translator/prometheusremotewrite diff --git a/storage/remote/otlptranslator/prometheus/normalize_label.go b/storage/remote/otlptranslator/prometheus/normalize_label.go index a6b41d1c3..4f9942bd1 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_label.go +++ b/storage/remote/otlptranslator/prometheus/normalize_label.go @@ -1,9 +1,20 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheus/normalize_label.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheus // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" +package prometheus import ( "strings" diff --git a/storage/remote/otlptranslator/prometheus/normalize_name.go b/storage/remote/otlptranslator/prometheus/normalize_name.go index a976dfb48..6cb4fc199 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name.go @@ -1,9 +1,20 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheus/normalize_name.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheus // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" +package prometheus import ( "strings" diff --git a/storage/remote/otlptranslator/prometheus/unit_to_ucum.go b/storage/remote/otlptranslator/prometheus/unit_to_ucum.go index 718a52067..1f8bf1a63 100644 --- a/storage/remote/otlptranslator/prometheus/unit_to_ucum.go +++ b/storage/remote/otlptranslator/prometheus/unit_to_ucum.go @@ -1,9 +1,20 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheus/unit_to_ucum.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheus // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" +package prometheus import "strings" diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/storage/remote/otlptranslator/prometheusremotewrite/helper.go index 817cbaba7..eea12b512 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -1,29 +1,42 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/helper.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheusremotewrite // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite" +package prometheusremotewrite import ( "encoding/hex" "fmt" "log" "math" + "slices" "sort" "strconv" - "strings" "time" "unicode/utf8" + "github.com/cespare/xxhash/v2" "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/timestamp" - "github.com/prometheus/prometheus/model/value" - "github.com/prometheus/prometheus/prompb" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" conventions "go.opentelemetry.io/collector/semconv/v1.6.1" + "github.com/prometheus/prometheus/model/timestamp" + "github.com/prometheus/prometheus/model/value" + "github.com/prometheus/prometheus/prompb" + prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" ) @@ -48,7 +61,7 @@ const ( ) type bucketBoundsData struct { - sig string + ts *prompb.TimeSeries bound float64 } @@ -66,94 +79,47 @@ func (a ByLabelName) Len() int { return len(a) } func (a ByLabelName) Less(i, j int) bool { return a[i].Name < a[j].Name } func (a ByLabelName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -// addSample finds a TimeSeries in tsMap that corresponds to the label set labels, and add sample to the TimeSeries; it -// creates a new TimeSeries in the map if not found and returns the time series signature. -// tsMap will be unmodified if either labels or sample is nil, but can still be modified if the exemplar is nil. -func addSample(tsMap map[string]*prompb.TimeSeries, sample *prompb.Sample, labels []prompb.Label, - datatype string) string { - if sample == nil || labels == nil || tsMap == nil { - // This shouldn't happen - return "" - } - - sig := timeSeriesSignature(datatype, labels) - ts := tsMap[sig] - if ts != nil { - ts.Samples = append(ts.Samples, *sample) - } else { - newTs := &prompb.TimeSeries{ - Labels: labels, - Samples: []prompb.Sample{*sample}, - } - tsMap[sig] = newTs - } - - return sig -} - -// addExemplars finds a bucket bound that corresponds to the exemplars value and add the exemplar to the specific sig; -// we only add exemplars if samples are presents -// tsMap is unmodified if either of its parameters is nil and samples are nil. -func addExemplars(tsMap map[string]*prompb.TimeSeries, exemplars []prompb.Exemplar, bucketBoundsData []bucketBoundsData) { - if len(tsMap) == 0 || len(bucketBoundsData) == 0 || len(exemplars) == 0 { - return - } - - sort.Sort(byBucketBoundsData(bucketBoundsData)) - - for _, exemplar := range exemplars { - addExemplar(tsMap, bucketBoundsData, exemplar) - } -} - -func addExemplar(tsMap map[string]*prompb.TimeSeries, bucketBounds []bucketBoundsData, exemplar prompb.Exemplar) { - for _, bucketBound := range bucketBounds { - sig := bucketBound.sig - bound := bucketBound.bound - - ts := tsMap[sig] - if ts != nil && len(ts.Samples) > 0 && exemplar.Value <= bound { - ts.Exemplars = append(ts.Exemplars, exemplar) - return - } - } -} - -// timeSeries return a string signature in the form of: -// -// TYPE-label1-value1- ... -labelN-valueN -// -// the label slice should not contain duplicate label names; this method sorts the slice by label name before creating +// timeSeriesSignature returns a hashed label set signature. +// The label slice should not contain duplicate label names; this method sorts the slice by label name before creating // the signature. -func timeSeriesSignature(datatype string, labels []prompb.Label) string { - length := len(datatype) - - for _, lb := range labels { - length += 2 + len(lb.GetName()) + len(lb.GetValue()) - } - - b := strings.Builder{} - b.Grow(length) - b.WriteString(datatype) - +// The algorithm is the same as in Prometheus' labels.StableHash function. +func timeSeriesSignature(labels []prompb.Label) uint64 { sort.Sort(ByLabelName(labels)) - for _, lb := range labels { - b.WriteString("-") - b.WriteString(lb.GetName()) - b.WriteString("-") - b.WriteString(lb.GetValue()) - } + // Use xxhash.Sum64(b) for fast path as it's faster. + b := make([]byte, 0, 1024) + for i, v := range labels { + if len(b)+len(v.Name)+len(v.Value)+2 >= cap(b) { + // If labels entry is 1KB+ do not allocate whole entry. + h := xxhash.New() + _, _ = h.Write(b) + for _, v := range labels[i:] { + _, _ = h.WriteString(v.Name) + _, _ = h.Write(seps) + _, _ = h.WriteString(v.Value) + _, _ = h.Write(seps) + } + return h.Sum64() + } - return b.String() + b = append(b, v.Name...) + b = append(b, seps[0]) + b = append(b, v.Value...) + b = append(b, seps[0]) + } + return xxhash.Sum64(b) } +var seps = []byte{'\xff'} + // createAttributes creates a slice of Prometheus Labels with OTLP attributes and pairs of string values. -// Unpaired string values are ignored. String pairs overwrite OTLP labels if collisions happen, and overwrites are -// logged. Resulting label names are sanitized. -func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externalLabels map[string]string, extras ...string) []prompb.Label { - serviceName, haveServiceName := resource.Attributes().Get(conventions.AttributeServiceName) - instance, haveInstanceID := resource.Attributes().Get(conventions.AttributeServiceInstanceID) +// Unpaired string values are ignored. String pairs overwrite OTLP labels if collisions happen and +// if logOnOverwrite is true, the overwrite is logged. Resulting label names are sanitized. +func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externalLabels map[string]string, + ignoreAttrs []string, logOnOverwrite bool, extras ...string) []prompb.Label { + resourceAttrs := resource.Attributes() + serviceName, haveServiceName := resourceAttrs.Get(conventions.AttributeServiceName) + instance, haveInstanceID := resourceAttrs.Get(conventions.AttributeServiceInstanceID) // Calculate the maximum possible number of labels we could return so we can preallocate l maxLabelCount := attributes.Len() + len(externalLabels) + len(extras)/2 @@ -171,9 +137,13 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externa // Ensure attributes are sorted by key for consistent merging of keys which // collide when sanitized. - labels := make([]prompb.Label, 0, attributes.Len()) + labels := make([]prompb.Label, 0, maxLabelCount) + // XXX: Should we always drop service namespace/service name/service instance ID from the labels + // (as they get mapped to other Prometheus labels)? attributes.Range(func(key string, value pcommon.Value) bool { - labels = append(labels, prompb.Label{Name: key, Value: value.AsString()}) + if !slices.Contains(ignoreAttrs, key) { + labels = append(labels, prompb.Label{Name: key, Value: value.AsString()}) + } return true }) sort.Stable(ByLabelName(labels)) @@ -190,7 +160,7 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externa // Map service.name + service.namespace to job if haveServiceName { val := serviceName.AsString() - if serviceNamespace, ok := resource.Attributes().Get(conventions.AttributeServiceNamespace); ok { + if serviceNamespace, ok := resourceAttrs.Get(conventions.AttributeServiceNamespace); ok { val = fmt.Sprintf("%s/%s", serviceNamespace.AsString(), val) } l[model.JobLabel] = val @@ -213,7 +183,7 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externa break } _, found := l[extras[i]] - if found { + if found && logOnOverwrite { log.Println("label " + extras[i] + " is overwritten. Check if Prometheus reserved labels are used.") } // internal labels should be maintained @@ -224,12 +194,12 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externa l[name] = extras[i+1] } - s := make([]prompb.Label, 0, len(l)) + labels = labels[:0] for k, v := range l { - s = append(s, prompb.Label{Name: k, Value: v}) + labels = append(labels, prompb.Label{Name: k, Value: v}) } - return s + return labels } // isValidAggregationTemporality checks whether an OTel metric has a valid @@ -249,100 +219,84 @@ func isValidAggregationTemporality(metric pmetric.Metric) bool { return false } -// addSingleHistogramDataPoint converts pt to 2 + min(len(ExplicitBounds), len(BucketCount)) + 1 samples. It -// ignore extra buckets if len(ExplicitBounds) > len(BucketCounts) -func addSingleHistogramDataPoint(pt pmetric.HistogramDataPoint, resource pcommon.Resource, metric pmetric.Metric, settings Settings, tsMap map[string]*prompb.TimeSeries, baseName string) { - timestamp := convertTimeStamp(pt.Timestamp()) - baseLabels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels) +func (c *prometheusConverter) addHistogramDataPoints(dataPoints pmetric.HistogramDataPointSlice, + resource pcommon.Resource, settings Settings, baseName string) { + for x := 0; x < dataPoints.Len(); x++ { + pt := dataPoints.At(x) + timestamp := convertTimeStamp(pt.Timestamp()) + baseLabels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels, nil, false) - createLabels := func(nameSuffix string, extras ...string) []prompb.Label { - extraLabelCount := len(extras) / 2 - labels := make([]prompb.Label, len(baseLabels), len(baseLabels)+extraLabelCount+1) // +1 for name - copy(labels, baseLabels) + // If the sum is unset, it indicates the _sum metric point should be + // omitted + if pt.HasSum() { + // treat sum as a sample in an individual TimeSeries + sum := &prompb.Sample{ + Value: pt.Sum(), + Timestamp: timestamp, + } + if pt.Flags().NoRecordedValue() { + sum.Value = math.Float64frombits(value.StaleNaN) + } + + sumlabels := createLabels(baseName+sumStr, baseLabels) + c.addSample(sum, sumlabels) - for extrasIdx := 0; extrasIdx < extraLabelCount; extrasIdx++ { - labels = append(labels, prompb.Label{Name: extras[extrasIdx], Value: extras[extrasIdx+1]}) } - // sum, count, and buckets of the histogram should append suffix to baseName - labels = append(labels, prompb.Label{Name: model.MetricNameLabel, Value: baseName + nameSuffix}) - - return labels - } - - // If the sum is unset, it indicates the _sum metric point should be - // omitted - if pt.HasSum() { - // treat sum as a sample in an individual TimeSeries - sum := &prompb.Sample{ - Value: pt.Sum(), + // treat count as a sample in an individual TimeSeries + count := &prompb.Sample{ + Value: float64(pt.Count()), Timestamp: timestamp, } if pt.Flags().NoRecordedValue() { - sum.Value = math.Float64frombits(value.StaleNaN) + count.Value = math.Float64frombits(value.StaleNaN) } - sumlabels := createLabels(sumStr) - addSample(tsMap, sum, sumlabels, metric.Type().String()) + countlabels := createLabels(baseName+countStr, baseLabels) + c.addSample(count, countlabels) - } + // cumulative count for conversion to cumulative histogram + var cumulativeCount uint64 - // treat count as a sample in an individual TimeSeries - count := &prompb.Sample{ - Value: float64(pt.Count()), - Timestamp: timestamp, - } - if pt.Flags().NoRecordedValue() { - count.Value = math.Float64frombits(value.StaleNaN) - } + var bucketBounds []bucketBoundsData - countlabels := createLabels(countStr) - addSample(tsMap, count, countlabels, metric.Type().String()) + // process each bound, based on histograms proto definition, # of buckets = # of explicit bounds + 1 + for i := 0; i < pt.ExplicitBounds().Len() && i < pt.BucketCounts().Len(); i++ { + bound := pt.ExplicitBounds().At(i) + cumulativeCount += pt.BucketCounts().At(i) + bucket := &prompb.Sample{ + Value: float64(cumulativeCount), + Timestamp: timestamp, + } + if pt.Flags().NoRecordedValue() { + bucket.Value = math.Float64frombits(value.StaleNaN) + } + boundStr := strconv.FormatFloat(bound, 'f', -1, 64) + labels := createLabels(baseName+bucketStr, baseLabels, leStr, boundStr) + ts := c.addSample(bucket, labels) - // cumulative count for conversion to cumulative histogram - var cumulativeCount uint64 - - promExemplars := getPromExemplars[pmetric.HistogramDataPoint](pt) - - var bucketBounds []bucketBoundsData - - // process each bound, based on histograms proto definition, # of buckets = # of explicit bounds + 1 - for i := 0; i < pt.ExplicitBounds().Len() && i < pt.BucketCounts().Len(); i++ { - bound := pt.ExplicitBounds().At(i) - cumulativeCount += pt.BucketCounts().At(i) - bucket := &prompb.Sample{ - Value: float64(cumulativeCount), + bucketBounds = append(bucketBounds, bucketBoundsData{ts: ts, bound: bound}) + } + // add le=+Inf bucket + infBucket := &prompb.Sample{ Timestamp: timestamp, } if pt.Flags().NoRecordedValue() { - bucket.Value = math.Float64frombits(value.StaleNaN) + infBucket.Value = math.Float64frombits(value.StaleNaN) + } else { + infBucket.Value = float64(pt.Count()) } - boundStr := strconv.FormatFloat(bound, 'f', -1, 64) - labels := createLabels(bucketStr, leStr, boundStr) - sig := addSample(tsMap, bucket, labels, metric.Type().String()) + infLabels := createLabels(baseName+bucketStr, baseLabels, leStr, pInfStr) + ts := c.addSample(infBucket, infLabels) - bucketBounds = append(bucketBounds, bucketBoundsData{sig: sig, bound: bound}) - } - // add le=+Inf bucket - infBucket := &prompb.Sample{ - Timestamp: timestamp, - } - if pt.Flags().NoRecordedValue() { - infBucket.Value = math.Float64frombits(value.StaleNaN) - } else { - infBucket.Value = float64(pt.Count()) - } - infLabels := createLabels(bucketStr, leStr, pInfStr) - sig := addSample(tsMap, infBucket, infLabels, metric.Type().String()) + bucketBounds = append(bucketBounds, bucketBoundsData{ts: ts, bound: math.Inf(1)}) + c.addExemplars(pt, bucketBounds) - bucketBounds = append(bucketBounds, bucketBoundsData{sig: sig, bound: math.Inf(1)}) - addExemplars(tsMap, promExemplars, bucketBounds) - - // add _created time series if needed - startTimestamp := pt.StartTimestamp() - if settings.ExportCreatedMetric && startTimestamp != 0 { - labels := createLabels(createdSuffix) - addCreatedTimeSeriesIfNeeded(tsMap, labels, startTimestamp, pt.Timestamp(), metric.Type().String()) + startTimestamp := pt.StartTimestamp() + if settings.ExportCreatedMetric && startTimestamp != 0 { + labels := createLabels(baseName+createdSuffix, baseLabels) + c.addTimeSeriesIfNeeded(labels, startTimestamp, pt.Timestamp()) + } } } @@ -448,129 +402,177 @@ func maxTimestamp(a, b pcommon.Timestamp) pcommon.Timestamp { return b } -// addSingleSummaryDataPoint converts pt to len(QuantileValues) + 2 samples. -func addSingleSummaryDataPoint(pt pmetric.SummaryDataPoint, resource pcommon.Resource, metric pmetric.Metric, settings Settings, - tsMap map[string]*prompb.TimeSeries, baseName string) { - timestamp := convertTimeStamp(pt.Timestamp()) - baseLabels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels) +func (c *prometheusConverter) addSummaryDataPoints(dataPoints pmetric.SummaryDataPointSlice, resource pcommon.Resource, + settings Settings, baseName string) { + for x := 0; x < dataPoints.Len(); x++ { + pt := dataPoints.At(x) + timestamp := convertTimeStamp(pt.Timestamp()) + baseLabels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels, nil, false) - createLabels := func(name string, extras ...string) []prompb.Label { - extraLabelCount := len(extras) / 2 - labels := make([]prompb.Label, len(baseLabels), len(baseLabels)+extraLabelCount+1) // +1 for name - copy(labels, baseLabels) - - for extrasIdx := 0; extrasIdx < extraLabelCount; extrasIdx++ { - labels = append(labels, prompb.Label{Name: extras[extrasIdx], Value: extras[extrasIdx+1]}) - } - - labels = append(labels, prompb.Label{Name: model.MetricNameLabel, Value: name}) - - return labels - } - - // treat sum as a sample in an individual TimeSeries - sum := &prompb.Sample{ - Value: pt.Sum(), - Timestamp: timestamp, - } - if pt.Flags().NoRecordedValue() { - sum.Value = math.Float64frombits(value.StaleNaN) - } - // sum and count of the summary should append suffix to baseName - sumlabels := createLabels(baseName + sumStr) - addSample(tsMap, sum, sumlabels, metric.Type().String()) - - // treat count as a sample in an individual TimeSeries - count := &prompb.Sample{ - Value: float64(pt.Count()), - Timestamp: timestamp, - } - if pt.Flags().NoRecordedValue() { - count.Value = math.Float64frombits(value.StaleNaN) - } - countlabels := createLabels(baseName + countStr) - addSample(tsMap, count, countlabels, metric.Type().String()) - - // process each percentile/quantile - for i := 0; i < pt.QuantileValues().Len(); i++ { - qt := pt.QuantileValues().At(i) - quantile := &prompb.Sample{ - Value: qt.Value(), + // treat sum as a sample in an individual TimeSeries + sum := &prompb.Sample{ + Value: pt.Sum(), Timestamp: timestamp, } if pt.Flags().NoRecordedValue() { - quantile.Value = math.Float64frombits(value.StaleNaN) + sum.Value = math.Float64frombits(value.StaleNaN) } - percentileStr := strconv.FormatFloat(qt.Quantile(), 'f', -1, 64) - qtlabels := createLabels(baseName, quantileStr, percentileStr) - addSample(tsMap, quantile, qtlabels, metric.Type().String()) - } + // sum and count of the summary should append suffix to baseName + sumlabels := createLabels(baseName+sumStr, baseLabels) + c.addSample(sum, sumlabels) - // add _created time series if needed - startTimestamp := pt.StartTimestamp() - if settings.ExportCreatedMetric && startTimestamp != 0 { - createdLabels := createLabels(baseName + createdSuffix) - addCreatedTimeSeriesIfNeeded(tsMap, createdLabels, startTimestamp, pt.Timestamp(), metric.Type().String()) + // treat count as a sample in an individual TimeSeries + count := &prompb.Sample{ + Value: float64(pt.Count()), + Timestamp: timestamp, + } + if pt.Flags().NoRecordedValue() { + count.Value = math.Float64frombits(value.StaleNaN) + } + countlabels := createLabels(baseName+countStr, baseLabels) + c.addSample(count, countlabels) + + // process each percentile/quantile + for i := 0; i < pt.QuantileValues().Len(); i++ { + qt := pt.QuantileValues().At(i) + quantile := &prompb.Sample{ + Value: qt.Value(), + Timestamp: timestamp, + } + if pt.Flags().NoRecordedValue() { + quantile.Value = math.Float64frombits(value.StaleNaN) + } + percentileStr := strconv.FormatFloat(qt.Quantile(), 'f', -1, 64) + qtlabels := createLabels(baseName, baseLabels, quantileStr, percentileStr) + c.addSample(quantile, qtlabels) + } + + startTimestamp := pt.StartTimestamp() + if settings.ExportCreatedMetric && startTimestamp != 0 { + createdLabels := createLabels(baseName+createdSuffix, baseLabels) + c.addTimeSeriesIfNeeded(createdLabels, startTimestamp, pt.Timestamp()) + } } } -// addCreatedTimeSeriesIfNeeded adds {name}_created time series with a single -// sample. If the series exists, then new samples won't be added. -func addCreatedTimeSeriesIfNeeded( - series map[string]*prompb.TimeSeries, - labels []prompb.Label, - startTimestamp pcommon.Timestamp, - timestamp pcommon.Timestamp, - metricType string, -) { - sig := timeSeriesSignature(metricType, labels) - if _, ok := series[sig]; !ok { - series[sig] = &prompb.TimeSeries{ - Labels: labels, - Samples: []prompb.Sample{ - { // convert ns to ms - Value: float64(convertTimeStamp(startTimestamp)), - Timestamp: convertTimeStamp(timestamp), - }, +// createLabels returns a copy of baseLabels, adding to it the pair model.MetricNameLabel=name. +// If extras are provided, corresponding label pairs are also added to the returned slice. +// If extras is uneven length, the last (unpaired) extra will be ignored. +func createLabels(name string, baseLabels []prompb.Label, extras ...string) []prompb.Label { + extraLabelCount := len(extras) / 2 + labels := make([]prompb.Label, len(baseLabels), len(baseLabels)+extraLabelCount+1) // +1 for name + copy(labels, baseLabels) + + n := len(extras) + n -= n % 2 + for extrasIdx := 0; extrasIdx < n; extrasIdx += 2 { + labels = append(labels, prompb.Label{Name: extras[extrasIdx], Value: extras[extrasIdx+1]}) + } + + labels = append(labels, prompb.Label{Name: model.MetricNameLabel, Value: name}) + return labels +} + +// getOrCreateTimeSeries returns the time series corresponding to the label set if existent, and false. +// Otherwise it creates a new one and returns that, and true. +func (c *prometheusConverter) getOrCreateTimeSeries(lbls []prompb.Label) (*prompb.TimeSeries, bool) { + h := timeSeriesSignature(lbls) + ts := c.unique[h] + if ts != nil { + if isSameMetric(ts, lbls) { + // We already have this metric + return ts, false + } + + // Look for a matching conflict + for _, cTS := range c.conflicts[h] { + if isSameMetric(cTS, lbls) { + // We already have this metric + return cTS, false + } + } + + // New conflict + ts = &prompb.TimeSeries{ + Labels: lbls, + } + c.conflicts[h] = append(c.conflicts[h], ts) + return ts, true + } + + // This metric is new + ts = &prompb.TimeSeries{ + Labels: lbls, + } + c.unique[h] = ts + return ts, true +} + +// addTimeSeriesIfNeeded adds a corresponding time series if it doesn't already exist. +// If the time series doesn't already exist, it gets added with startTimestamp for its value and timestamp for its timestamp, +// both converted to milliseconds. +func (c *prometheusConverter) addTimeSeriesIfNeeded(lbls []prompb.Label, startTimestamp pcommon.Timestamp, timestamp pcommon.Timestamp) { + ts, created := c.getOrCreateTimeSeries(lbls) + if created { + ts.Samples = []prompb.Sample{ + { + // convert ns to ms + Value: float64(convertTimeStamp(startTimestamp)), + Timestamp: convertTimeStamp(timestamp), }, } } } -// addResourceTargetInfo converts the resource to the target info metric -func addResourceTargetInfo(resource pcommon.Resource, settings Settings, timestamp pcommon.Timestamp, tsMap map[string]*prompb.TimeSeries) { - if settings.DisableTargetInfo { +// addResourceTargetInfo converts the resource to the target info metric. +func addResourceTargetInfo(resource pcommon.Resource, settings Settings, timestamp pcommon.Timestamp, converter *prometheusConverter) { + if settings.DisableTargetInfo || timestamp == 0 { return } - // Use resource attributes (other than those used for job+instance) as the - // metric labels for the target info metric - attributes := pcommon.NewMap() - resource.Attributes().CopyTo(attributes) - attributes.RemoveIf(func(k string, _ pcommon.Value) bool { - switch k { - case conventions.AttributeServiceName, conventions.AttributeServiceNamespace, conventions.AttributeServiceInstanceID: - // Remove resource attributes used for job + instance - return true - default: - return false + + attributes := resource.Attributes() + identifyingAttrs := []string{ + conventions.AttributeServiceNamespace, + conventions.AttributeServiceName, + conventions.AttributeServiceInstanceID, + } + nonIdentifyingAttrsCount := attributes.Len() + for _, a := range identifyingAttrs { + _, haveAttr := attributes.Get(a) + if haveAttr { + nonIdentifyingAttrsCount-- } - }) - if attributes.Len() == 0 { + } + if nonIdentifyingAttrsCount == 0 { // If we only have job + instance, then target_info isn't useful, so don't add it. return } - // create parameters for addSample + name := targetMetricName if len(settings.Namespace) > 0 { name = settings.Namespace + "_" + name } - labels := createAttributes(resource, attributes, settings.ExternalLabels, model.MetricNameLabel, name) + + labels := createAttributes(resource, attributes, settings.ExternalLabels, identifyingAttrs, false, model.MetricNameLabel, name) + haveIdentifier := false + for _, l := range labels { + if l.Name == model.JobLabel || l.Name == model.InstanceLabel { + haveIdentifier = true + break + } + } + + if !haveIdentifier { + // We need at least one identifying label to generate target_info. + return + } + sample := &prompb.Sample{ Value: float64(1), // convert ns to ms Timestamp: convertTimeStamp(timestamp), } - addSample(tsMap, sample, labels, infoType) + converter.addSample(sample, labels) } // convertTimeStamp converts OTLP timestamp in ns to timestamp in ms diff --git a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go index 14cea32c3..45f1df123 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go @@ -1,58 +1,59 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/histograms.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheusremotewrite // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite" +package prometheusremotewrite import ( "fmt" "math" "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/value" - "github.com/prometheus/prometheus/prompb" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" + + "github.com/prometheus/prometheus/model/value" + "github.com/prometheus/prometheus/prompb" ) const defaultZeroThreshold = 1e-128 -func addSingleExponentialHistogramDataPoint( - metric string, - pt pmetric.ExponentialHistogramDataPoint, - resource pcommon.Resource, - settings Settings, - series map[string]*prompb.TimeSeries, -) error { - labels := createAttributes( - resource, - pt.Attributes(), - settings.ExternalLabels, - model.MetricNameLabel, - metric, - ) +func (c *prometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetric.ExponentialHistogramDataPointSlice, + resource pcommon.Resource, settings Settings, baseName string) error { + for x := 0; x < dataPoints.Len(); x++ { + pt := dataPoints.At(x) + lbls := createAttributes( + resource, + pt.Attributes(), + settings.ExternalLabels, + nil, + true, + model.MetricNameLabel, + baseName, + ) + ts, _ := c.getOrCreateTimeSeries(lbls) - sig := timeSeriesSignature( - pmetric.MetricTypeExponentialHistogram.String(), - labels, - ) - ts, ok := series[sig] - if !ok { - ts = &prompb.TimeSeries{ - Labels: labels, + histogram, err := exponentialToNativeHistogram(pt) + if err != nil { + return err } - series[sig] = ts - } + ts.Histograms = append(ts.Histograms, histogram) - histogram, err := exponentialToNativeHistogram(pt) - if err != nil { - return err + exemplars := getPromExemplars[pmetric.ExponentialHistogramDataPoint](pt) + ts.Exemplars = append(ts.Exemplars, exemplars...) } - ts.Histograms = append(ts.Histograms, histogram) - - exemplars := getPromExemplars[pmetric.ExponentialHistogramDataPoint](pt) - ts.Exemplars = append(ts.Exemplars, exemplars...) return nil } diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go index fb141034a..16bdc098b 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go @@ -1,19 +1,32 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/metrics_to_prw.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheusremotewrite // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite" +package prometheusremotewrite import ( "errors" "fmt" + "sort" + "strconv" - "github.com/prometheus/prometheus/prompb" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" "go.uber.org/multierr" + "github.com/prometheus/prometheus/prompb" prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" ) @@ -27,9 +40,33 @@ type Settings struct { } // FromMetrics converts pmetric.Metrics to Prometheus remote write format. -func FromMetrics(md pmetric.Metrics, settings Settings) (tsMap map[string]*prompb.TimeSeries, errs error) { - tsMap = make(map[string]*prompb.TimeSeries) +func FromMetrics(md pmetric.Metrics, settings Settings) (map[string]*prompb.TimeSeries, error) { + c := newPrometheusConverter() + errs := c.fromMetrics(md, settings) + tss := c.timeSeries() + out := make(map[string]*prompb.TimeSeries, len(tss)) + for i := range tss { + out[strconv.Itoa(i)] = &tss[i] + } + return out, errs +} + +// prometheusConverter converts from OTel write format to Prometheus write format. +type prometheusConverter struct { + unique map[uint64]*prompb.TimeSeries + conflicts map[uint64][]*prompb.TimeSeries +} + +func newPrometheusConverter() *prometheusConverter { + return &prometheusConverter{ + unique: map[uint64]*prompb.TimeSeries{}, + conflicts: map[uint64][]*prompb.TimeSeries{}, + } +} + +// fromMetrics converts pmetric.Metrics to Prometheus remote write format. +func (c *prometheusConverter) fromMetrics(md pmetric.Metrics, settings Settings) (errs error) { resourceMetricsSlice := md.ResourceMetrics() for i := 0; i < resourceMetricsSlice.Len(); i++ { resourceMetrics := resourceMetricsSlice.At(i) @@ -39,8 +76,7 @@ func FromMetrics(md pmetric.Metrics, settings Settings) (tsMap map[string]*promp // use with the "target" info metric var mostRecentTimestamp pcommon.Timestamp for j := 0; j < scopeMetricsSlice.Len(); j++ { - scopeMetrics := scopeMetricsSlice.At(j) - metricSlice := scopeMetrics.Metrics() + metricSlice := scopeMetricsSlice.At(j).Metrics() // TODO: decide if instrumentation library information should be exported as labels for k := 0; k < metricSlice.Len(); k++ { @@ -54,65 +90,125 @@ func FromMetrics(md pmetric.Metrics, settings Settings) (tsMap map[string]*promp promName := prometheustranslator.BuildCompliantName(metric, settings.Namespace, settings.AddMetricSuffixes) - // handle individual metric based on type + // handle individual metrics based on type //exhaustive:enforce switch metric.Type() { case pmetric.MetricTypeGauge: dataPoints := metric.Gauge().DataPoints() if dataPoints.Len() == 0 { errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) + break } - for x := 0; x < dataPoints.Len(); x++ { - addSingleGaugeNumberDataPoint(dataPoints.At(x), resource, metric, settings, tsMap, promName) - } + c.addGaugeNumberDataPoints(dataPoints, resource, settings, promName) case pmetric.MetricTypeSum: dataPoints := metric.Sum().DataPoints() if dataPoints.Len() == 0 { errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) + break } - for x := 0; x < dataPoints.Len(); x++ { - addSingleSumNumberDataPoint(dataPoints.At(x), resource, metric, settings, tsMap, promName) - } + c.addSumNumberDataPoints(dataPoints, resource, metric, settings, promName) case pmetric.MetricTypeHistogram: dataPoints := metric.Histogram().DataPoints() if dataPoints.Len() == 0 { errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) + break } - for x := 0; x < dataPoints.Len(); x++ { - addSingleHistogramDataPoint(dataPoints.At(x), resource, metric, settings, tsMap, promName) - } + c.addHistogramDataPoints(dataPoints, resource, settings, promName) case pmetric.MetricTypeExponentialHistogram: dataPoints := metric.ExponentialHistogram().DataPoints() if dataPoints.Len() == 0 { errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) + break } - for x := 0; x < dataPoints.Len(); x++ { - errs = multierr.Append( - errs, - addSingleExponentialHistogramDataPoint( - promName, - dataPoints.At(x), - resource, - settings, - tsMap, - ), - ) - } + errs = multierr.Append(errs, c.addExponentialHistogramDataPoints( + dataPoints, + resource, + settings, + promName, + )) case pmetric.MetricTypeSummary: dataPoints := metric.Summary().DataPoints() if dataPoints.Len() == 0 { errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) + break } - for x := 0; x < dataPoints.Len(); x++ { - addSingleSummaryDataPoint(dataPoints.At(x), resource, metric, settings, tsMap, promName) - } + c.addSummaryDataPoints(dataPoints, resource, settings, promName) default: errs = multierr.Append(errs, errors.New("unsupported metric type")) } } } - addResourceTargetInfo(resource, settings, mostRecentTimestamp, tsMap) + addResourceTargetInfo(resource, settings, mostRecentTimestamp, c) } return } + +// timeSeries returns a slice of the prompb.TimeSeries that were converted from OTel format. +func (c *prometheusConverter) timeSeries() []prompb.TimeSeries { + conflicts := 0 + for _, ts := range c.conflicts { + conflicts += len(ts) + } + allTS := make([]prompb.TimeSeries, 0, len(c.unique)+conflicts) + for _, ts := range c.unique { + allTS = append(allTS, *ts) + } + for _, cTS := range c.conflicts { + for _, ts := range cTS { + allTS = append(allTS, *ts) + } + } + + return allTS +} + +func isSameMetric(ts *prompb.TimeSeries, lbls []prompb.Label) bool { + if len(ts.Labels) != len(lbls) { + return false + } + for i, l := range ts.Labels { + if l.Name != ts.Labels[i].Name || l.Value != ts.Labels[i].Value { + return false + } + } + return true +} + +// addExemplars adds exemplars for the dataPoint. For each exemplar, if it can find a bucket bound corresponding to its value, +// the exemplar is added to the bucket bound's time series, provided that the time series' has samples. +func (c *prometheusConverter) addExemplars(dataPoint pmetric.HistogramDataPoint, bucketBounds []bucketBoundsData) { + if len(bucketBounds) == 0 { + return + } + + exemplars := getPromExemplars(dataPoint) + if len(exemplars) == 0 { + return + } + + sort.Sort(byBucketBoundsData(bucketBounds)) + for _, exemplar := range exemplars { + for _, bound := range bucketBounds { + if len(bound.ts.Samples) > 0 && exemplar.Value <= bound.bound { + bound.ts.Exemplars = append(bound.ts.Exemplars, exemplar) + break + } + } + } +} + +// addSample finds a TimeSeries that corresponds to lbls, and adds sample to it. +// If there is no corresponding TimeSeries already, it's created. +// The corresponding TimeSeries is returned. +// If either lbls is nil/empty or sample is nil, nothing is done. +func (c *prometheusConverter) addSample(sample *prompb.Sample, lbls []prompb.Label) *prompb.TimeSeries { + if sample == nil || len(lbls) == 0 { + // This shouldn't happen + return nil + } + + ts, _ := c.getOrCreateTimeSeries(lbls) + ts.Samples = append(ts.Samples, *sample) + return ts +} diff --git a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go index b5bd8765f..75c3d9845 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go @@ -1,106 +1,110 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/number_data_points.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheusremotewrite // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite" +package prometheusremotewrite import ( "math" "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/value" - "github.com/prometheus/prometheus/prompb" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" + + "github.com/prometheus/prometheus/model/value" + "github.com/prometheus/prometheus/prompb" ) -// addSingleGaugeNumberDataPoint converts the Gauge metric data point to a -// Prometheus time series with samples and labels. The result is stored in the -// series map. -func addSingleGaugeNumberDataPoint( - pt pmetric.NumberDataPoint, - resource pcommon.Resource, - metric pmetric.Metric, - settings Settings, - series map[string]*prompb.TimeSeries, - name string, -) { - labels := createAttributes( - resource, - pt.Attributes(), - settings.ExternalLabels, - model.MetricNameLabel, - name, - ) - sample := &prompb.Sample{ - // convert ns to ms - Timestamp: convertTimeStamp(pt.Timestamp()), +func (c *prometheusConverter) addGaugeNumberDataPoints(dataPoints pmetric.NumberDataPointSlice, + resource pcommon.Resource, settings Settings, name string) { + for x := 0; x < dataPoints.Len(); x++ { + pt := dataPoints.At(x) + labels := createAttributes( + resource, + pt.Attributes(), + settings.ExternalLabels, + nil, + true, + model.MetricNameLabel, + name, + ) + sample := &prompb.Sample{ + // convert ns to ms + Timestamp: convertTimeStamp(pt.Timestamp()), + } + switch pt.ValueType() { + case pmetric.NumberDataPointValueTypeInt: + sample.Value = float64(pt.IntValue()) + case pmetric.NumberDataPointValueTypeDouble: + sample.Value = pt.DoubleValue() + } + if pt.Flags().NoRecordedValue() { + sample.Value = math.Float64frombits(value.StaleNaN) + } + c.addSample(sample, labels) } - switch pt.ValueType() { - case pmetric.NumberDataPointValueTypeInt: - sample.Value = float64(pt.IntValue()) - case pmetric.NumberDataPointValueTypeDouble: - sample.Value = pt.DoubleValue() - } - if pt.Flags().NoRecordedValue() { - sample.Value = math.Float64frombits(value.StaleNaN) - } - addSample(series, sample, labels, metric.Type().String()) } -// addSingleSumNumberDataPoint converts the Sum metric data point to a Prometheus -// time series with samples, labels and exemplars. The result is stored in the -// series map. -func addSingleSumNumberDataPoint( - pt pmetric.NumberDataPoint, - resource pcommon.Resource, - metric pmetric.Metric, - settings Settings, - series map[string]*prompb.TimeSeries, - name string, -) { - labels := createAttributes( - resource, - pt.Attributes(), - settings.ExternalLabels, - model.MetricNameLabel, name, - ) - sample := &prompb.Sample{ - // convert ns to ms - Timestamp: convertTimeStamp(pt.Timestamp()), - } - switch pt.ValueType() { - case pmetric.NumberDataPointValueTypeInt: - sample.Value = float64(pt.IntValue()) - case pmetric.NumberDataPointValueTypeDouble: - sample.Value = pt.DoubleValue() - } - if pt.Flags().NoRecordedValue() { - sample.Value = math.Float64frombits(value.StaleNaN) - } - sig := addSample(series, sample, labels, metric.Type().String()) - - if ts := series[sig]; sig != "" && ts != nil { - exemplars := getPromExemplars[pmetric.NumberDataPoint](pt) - ts.Exemplars = append(ts.Exemplars, exemplars...) - } - - // add _created time series if needed - if settings.ExportCreatedMetric && metric.Sum().IsMonotonic() { - startTimestamp := pt.StartTimestamp() - if startTimestamp == 0 { - return +func (c *prometheusConverter) addSumNumberDataPoints(dataPoints pmetric.NumberDataPointSlice, + resource pcommon.Resource, metric pmetric.Metric, settings Settings, name string) { + for x := 0; x < dataPoints.Len(); x++ { + pt := dataPoints.At(x) + lbls := createAttributes( + resource, + pt.Attributes(), + settings.ExternalLabels, + nil, + true, + model.MetricNameLabel, + name, + ) + sample := &prompb.Sample{ + // convert ns to ms + Timestamp: convertTimeStamp(pt.Timestamp()), + } + switch pt.ValueType() { + case pmetric.NumberDataPointValueTypeInt: + sample.Value = float64(pt.IntValue()) + case pmetric.NumberDataPointValueTypeDouble: + sample.Value = pt.DoubleValue() + } + if pt.Flags().NoRecordedValue() { + sample.Value = math.Float64frombits(value.StaleNaN) + } + ts := c.addSample(sample, lbls) + if ts != nil { + exemplars := getPromExemplars[pmetric.NumberDataPoint](pt) + ts.Exemplars = append(ts.Exemplars, exemplars...) } - createdLabels := make([]prompb.Label, len(labels)) - copy(createdLabels, labels) - for i, l := range createdLabels { - if l.Name == model.MetricNameLabel { - createdLabels[i].Value = name + createdSuffix - break + // add created time series if needed + if settings.ExportCreatedMetric && metric.Sum().IsMonotonic() { + startTimestamp := pt.StartTimestamp() + if startTimestamp == 0 { + return } + + createdLabels := make([]prompb.Label, len(lbls)) + copy(createdLabels, lbls) + for i, l := range createdLabels { + if l.Name == model.MetricNameLabel { + createdLabels[i].Value = name + createdSuffix + break + } + } + c.addTimeSeriesIfNeeded(createdLabels, startTimestamp, pt.Timestamp()) } - addCreatedTimeSeriesIfNeeded(series, createdLabels, startTimestamp, pt.Timestamp(), metric.Type().String()) } } diff --git a/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go b/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go index e43797212..ba4870419 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go @@ -1,14 +1,25 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/otlp_to_openmetrics_metadata.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheusremotewrite // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite" +package prometheusremotewrite import ( - "github.com/prometheus/prometheus/prompb" "go.opentelemetry.io/collector/pdata/pmetric" + "github.com/prometheus/prometheus/prompb" prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" ) diff --git a/storage/remote/otlptranslator/update-copy.sh b/storage/remote/otlptranslator/update-copy.sh deleted file mode 100755 index 8aa645e0b..000000000 --- a/storage/remote/otlptranslator/update-copy.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -set -xe - -OTEL_VERSION=v0.95.0 - -git clone https://github.com/open-telemetry/opentelemetry-collector-contrib ./tmp -cd ./tmp -git checkout $OTEL_VERSION -cd .. - -rm -rf ./prometheusremotewrite/* -cp -r ./tmp/pkg/translator/prometheusremotewrite/*.go ./prometheusremotewrite -rm -rf ./prometheusremotewrite/*_test.go - -rm -rf ./prometheus/* -cp -r ./tmp/pkg/translator/prometheus/*.go ./prometheus -rm -rf ./prometheus/*_test.go - -rm -rf ./tmp - -case $(sed --help 2>&1) in - *GNU*) set sed -i;; - *) set sed -i '';; -esac - -"$@" -e 's#github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/prometheus#github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus#g' ./prometheusremotewrite/*.go ./prometheus/*.go -"$@" -e '1s#^#// DO NOT EDIT. COPIED AS-IS. SEE ../README.md\n\n#g' ./prometheusremotewrite/*.go ./prometheus/*.go From 99f3051f45ef43735034802e2b56673fe40b2276 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Tue, 30 Apr 2024 11:37:00 +0200 Subject: [PATCH 26/44] OTLP: Use PrometheusConverter directly Signed-off-by: Arve Knudsen --- CHANGELOG.md | 1 + .../prometheusremotewrite/helper.go | 10 +++--- .../prometheusremotewrite/histograms.go | 2 +- .../prometheusremotewrite/metrics_to_prw.go | 34 ++++++------------- .../number_data_points.go | 4 +-- storage/remote/write_handler.go | 16 +++------ 6 files changed, 24 insertions(+), 43 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 23d2c89da..5cc5b8f40 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ * [CHANGE] TSDB: Fix the predicate checking for blocks which are beyond the retention period to include the ones right at the retention boundary. #9633 * [ENHANCEMENT] Rules: Add `rule_group_last_restore_duration_seconds` to measure the time it takes to restore a rule group. #13974 +* [ENHANCEMENT] OTLP: Improve remote write format translation performance by using label set hashes for metric identifiers instead of string based ones. #14006 #13991 ## 2.51.2 / 2024-04-09 diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/storage/remote/otlptranslator/prometheusremotewrite/helper.go index de228e807..68be82e44 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -219,7 +219,7 @@ func isValidAggregationTemporality(metric pmetric.Metric) bool { return false } -func (c *prometheusConverter) addHistogramDataPoints(dataPoints pmetric.HistogramDataPointSlice, +func (c *PrometheusConverter) addHistogramDataPoints(dataPoints pmetric.HistogramDataPointSlice, resource pcommon.Resource, settings Settings, baseName string) { for x := 0; x < dataPoints.Len(); x++ { pt := dataPoints.At(x) @@ -395,7 +395,7 @@ func mostRecentTimestampInMetric(metric pmetric.Metric) pcommon.Timestamp { return ts } -func (c *prometheusConverter) addSummaryDataPoints(dataPoints pmetric.SummaryDataPointSlice, resource pcommon.Resource, +func (c *PrometheusConverter) addSummaryDataPoints(dataPoints pmetric.SummaryDataPointSlice, resource pcommon.Resource, settings Settings, baseName string) { for x := 0; x < dataPoints.Len(); x++ { pt := dataPoints.At(x) @@ -468,7 +468,7 @@ func createLabels(name string, baseLabels []prompb.Label, extras ...string) []pr // getOrCreateTimeSeries returns the time series corresponding to the label set if existent, and false. // Otherwise it creates a new one and returns that, and true. -func (c *prometheusConverter) getOrCreateTimeSeries(lbls []prompb.Label) (*prompb.TimeSeries, bool) { +func (c *PrometheusConverter) getOrCreateTimeSeries(lbls []prompb.Label) (*prompb.TimeSeries, bool) { h := timeSeriesSignature(lbls) ts := c.unique[h] if ts != nil { @@ -504,7 +504,7 @@ func (c *prometheusConverter) getOrCreateTimeSeries(lbls []prompb.Label) (*promp // addTimeSeriesIfNeeded adds a corresponding time series if it doesn't already exist. // If the time series doesn't already exist, it gets added with startTimestamp for its value and timestamp for its timestamp, // both converted to milliseconds. -func (c *prometheusConverter) addTimeSeriesIfNeeded(lbls []prompb.Label, startTimestamp pcommon.Timestamp, timestamp pcommon.Timestamp) { +func (c *PrometheusConverter) addTimeSeriesIfNeeded(lbls []prompb.Label, startTimestamp pcommon.Timestamp, timestamp pcommon.Timestamp) { ts, created := c.getOrCreateTimeSeries(lbls) if created { ts.Samples = []prompb.Sample{ @@ -518,7 +518,7 @@ func (c *prometheusConverter) addTimeSeriesIfNeeded(lbls []prompb.Label, startTi } // addResourceTargetInfo converts the resource to the target info metric. -func addResourceTargetInfo(resource pcommon.Resource, settings Settings, timestamp pcommon.Timestamp, converter *prometheusConverter) { +func addResourceTargetInfo(resource pcommon.Resource, settings Settings, timestamp pcommon.Timestamp, converter *PrometheusConverter) { if settings.DisableTargetInfo || timestamp == 0 { return } diff --git a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go index 45f1df123..31d343fe4 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go @@ -30,7 +30,7 @@ import ( const defaultZeroThreshold = 1e-128 -func (c *prometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetric.ExponentialHistogramDataPointSlice, +func (c *PrometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetric.ExponentialHistogramDataPointSlice, resource pcommon.Resource, settings Settings, baseName string) error { for x := 0; x < dataPoints.Len(); x++ { pt := dataPoints.At(x) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go index a108306ba..2d6aa30a1 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go @@ -20,7 +20,6 @@ import ( "errors" "fmt" "sort" - "strconv" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" @@ -39,34 +38,21 @@ type Settings struct { SendMetadata bool } -// FromMetrics converts pmetric.Metrics to Prometheus remote write format. -func FromMetrics(md pmetric.Metrics, settings Settings) (map[string]*prompb.TimeSeries, error) { - c := newPrometheusConverter() - errs := c.fromMetrics(md, settings) - tss := c.timeSeries() - out := make(map[string]*prompb.TimeSeries, len(tss)) - for i := range tss { - out[strconv.Itoa(i)] = &tss[i] - } - - return out, errs -} - -// prometheusConverter converts from OTel write format to Prometheus write format. -type prometheusConverter struct { +// PrometheusConverter converts from OTel write format to Prometheus write format. +type PrometheusConverter struct { unique map[uint64]*prompb.TimeSeries conflicts map[uint64][]*prompb.TimeSeries } -func newPrometheusConverter() *prometheusConverter { - return &prometheusConverter{ +func NewPrometheusConverter() *PrometheusConverter { + return &PrometheusConverter{ unique: map[uint64]*prompb.TimeSeries{}, conflicts: map[uint64][]*prompb.TimeSeries{}, } } -// fromMetrics converts pmetric.Metrics to Prometheus remote write format. -func (c *prometheusConverter) fromMetrics(md pmetric.Metrics, settings Settings) (errs error) { +// FromMetrics converts pmetric.Metrics to Prometheus remote write format. +func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) (errs error) { resourceMetricsSlice := md.ResourceMetrics() for i := 0; i < resourceMetricsSlice.Len(); i++ { resourceMetrics := resourceMetricsSlice.At(i) @@ -144,8 +130,8 @@ func (c *prometheusConverter) fromMetrics(md pmetric.Metrics, settings Settings) return } -// timeSeries returns a slice of the prompb.TimeSeries that were converted from OTel format. -func (c *prometheusConverter) timeSeries() []prompb.TimeSeries { +// TimeSeries returns a slice of the prompb.TimeSeries that were converted from OTel format. +func (c *PrometheusConverter) TimeSeries() []prompb.TimeSeries { conflicts := 0 for _, ts := range c.conflicts { conflicts += len(ts) @@ -177,7 +163,7 @@ func isSameMetric(ts *prompb.TimeSeries, lbls []prompb.Label) bool { // addExemplars adds exemplars for the dataPoint. For each exemplar, if it can find a bucket bound corresponding to its value, // the exemplar is added to the bucket bound's time series, provided that the time series' has samples. -func (c *prometheusConverter) addExemplars(dataPoint pmetric.HistogramDataPoint, bucketBounds []bucketBoundsData) { +func (c *PrometheusConverter) addExemplars(dataPoint pmetric.HistogramDataPoint, bucketBounds []bucketBoundsData) { if len(bucketBounds) == 0 { return } @@ -202,7 +188,7 @@ func (c *prometheusConverter) addExemplars(dataPoint pmetric.HistogramDataPoint, // If there is no corresponding TimeSeries already, it's created. // The corresponding TimeSeries is returned. // If either lbls is nil/empty or sample is nil, nothing is done. -func (c *prometheusConverter) addSample(sample *prompb.Sample, lbls []prompb.Label) *prompb.TimeSeries { +func (c *PrometheusConverter) addSample(sample *prompb.Sample, lbls []prompb.Label) *prompb.TimeSeries { if sample == nil || len(lbls) == 0 { // This shouldn't happen return nil diff --git a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go index 75c3d9845..aafebc6c4 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go @@ -27,7 +27,7 @@ import ( "github.com/prometheus/prometheus/prompb" ) -func (c *prometheusConverter) addGaugeNumberDataPoints(dataPoints pmetric.NumberDataPointSlice, +func (c *PrometheusConverter) addGaugeNumberDataPoints(dataPoints pmetric.NumberDataPointSlice, resource pcommon.Resource, settings Settings, name string) { for x := 0; x < dataPoints.Len(); x++ { pt := dataPoints.At(x) @@ -57,7 +57,7 @@ func (c *prometheusConverter) addGaugeNumberDataPoints(dataPoints pmetric.Number } } -func (c *prometheusConverter) addSumNumberDataPoints(dataPoints pmetric.NumberDataPointSlice, +func (c *PrometheusConverter) addSumNumberDataPoints(dataPoints pmetric.NumberDataPointSlice, resource pcommon.Resource, metric pmetric.Metric, settings Settings, name string) { for x := 0; x < dataPoints.Len(); x++ { pt := dataPoints.At(x) diff --git a/storage/remote/write_handler.go b/storage/remote/write_handler.go index bb6b8423a..ff227292b 100644 --- a/storage/remote/write_handler.go +++ b/storage/remote/write_handler.go @@ -208,21 +208,15 @@ func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { return } - prwMetricsMap, errs := otlptranslator.FromMetrics(req.Metrics(), otlptranslator.Settings{ + converter := otlptranslator.NewPrometheusConverter() + if err := converter.FromMetrics(req.Metrics(), otlptranslator.Settings{ AddMetricSuffixes: true, - }) - if errs != nil { - level.Warn(h.logger).Log("msg", "Error translating OTLP metrics to Prometheus write request", "err", errs) - } - - prwMetrics := make([]prompb.TimeSeries, 0, len(prwMetricsMap)) - - for _, ts := range prwMetricsMap { - prwMetrics = append(prwMetrics, *ts) + }); err != nil { + level.Warn(h.logger).Log("msg", "Error translating OTLP metrics to Prometheus write request", "err", err) } err = h.rwHandler.write(r.Context(), &prompb.WriteRequest{ - Timeseries: prwMetrics, + Timeseries: converter.TimeSeries(), }) switch { From 918950756988fde81d5315eeed7c628e963cde76 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Tue, 30 Apr 2024 11:56:35 +0200 Subject: [PATCH 27/44] prometheusremotewrite: Add PrometheusConverter.FromMetrics benchmark Signed-off-by: Arve Knudsen --- .../metrics_to_prw_test.go | 134 ++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go new file mode 100644 index 000000000..4797daeec --- /dev/null +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go @@ -0,0 +1,134 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/metrics_to_prw_test.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + +package prometheusremotewrite + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + "go.opentelemetry.io/collector/pdata/pmetric/pmetricotlp" +) + +func BenchmarkPrometheusConverter_FromMetrics(b *testing.B) { + for _, resourceAttributeCount := range []int{0, 5, 50} { + b.Run(fmt.Sprintf("resource attribute count: %v", resourceAttributeCount), func(b *testing.B) { + for _, histogramCount := range []int{0, 1000} { + b.Run(fmt.Sprintf("histogram count: %v", histogramCount), func(b *testing.B) { + nonHistogramCounts := []int{0, 1000} + + if resourceAttributeCount == 0 && histogramCount == 0 { + // Don't bother running a scenario where we'll generate no series. + nonHistogramCounts = []int{1000} + } + + for _, nonHistogramCount := range nonHistogramCounts { + b.Run(fmt.Sprintf("non-histogram count: %v", nonHistogramCount), func(b *testing.B) { + for _, labelsPerMetric := range []int{2, 20} { + b.Run(fmt.Sprintf("labels per metric: %v", labelsPerMetric), func(b *testing.B) { + for _, exemplarsPerSeries := range []int{0, 5, 10} { + b.Run(fmt.Sprintf("exemplars per series: %v", exemplarsPerSeries), func(b *testing.B) { + payload := createExportRequest(resourceAttributeCount, histogramCount, nonHistogramCount, labelsPerMetric, exemplarsPerSeries) + + for i := 0; i < b.N; i++ { + converter := NewPrometheusConverter() + require.NoError(b, converter.FromMetrics(payload.Metrics(), Settings{})) + require.NotNil(b, converter.TimeSeries()) + } + }) + } + }) + } + }) + } + }) + } + }) + } +} + +func createExportRequest(resourceAttributeCount int, histogramCount int, nonHistogramCount int, labelsPerMetric int, exemplarsPerSeries int) pmetricotlp.ExportRequest { + request := pmetricotlp.NewExportRequest() + + rm := request.Metrics().ResourceMetrics().AppendEmpty() + generateAttributes(rm.Resource().Attributes(), "resource", resourceAttributeCount) + + metrics := rm.ScopeMetrics().AppendEmpty().Metrics() + ts := pcommon.NewTimestampFromTime(time.Now()) + + for i := 1; i <= histogramCount; i++ { + m := metrics.AppendEmpty() + m.SetEmptyHistogram() + m.SetName(fmt.Sprintf("histogram-%v", i)) + m.Histogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + h := m.Histogram().DataPoints().AppendEmpty() + h.SetTimestamp(ts) + + // Set 50 samples, 10 each with values 0.5, 1, 2, 4, and 8 + h.SetCount(50) + h.SetSum(155) + h.BucketCounts().FromRaw([]uint64{10, 10, 10, 10, 10, 0}) + h.ExplicitBounds().FromRaw([]float64{.5, 1, 2, 4, 8, 16}) // Bucket boundaries include the upper limit (ie. each sample is on the upper limit of its bucket) + + generateAttributes(h.Attributes(), "series", labelsPerMetric) + generateExemplars(h.Exemplars(), exemplarsPerSeries, ts) + } + + for i := 1; i <= nonHistogramCount; i++ { + m := metrics.AppendEmpty() + m.SetEmptySum() + m.SetName(fmt.Sprintf("sum-%v", i)) + m.Sum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + point := m.Sum().DataPoints().AppendEmpty() + point.SetTimestamp(ts) + point.SetDoubleValue(1.23) + generateAttributes(point.Attributes(), "series", labelsPerMetric) + generateExemplars(point.Exemplars(), exemplarsPerSeries, ts) + } + + for i := 1; i <= nonHistogramCount; i++ { + m := metrics.AppendEmpty() + m.SetEmptyGauge() + m.SetName(fmt.Sprintf("gauge-%v", i)) + point := m.Gauge().DataPoints().AppendEmpty() + point.SetTimestamp(ts) + point.SetDoubleValue(1.23) + generateAttributes(point.Attributes(), "series", labelsPerMetric) + generateExemplars(point.Exemplars(), exemplarsPerSeries, ts) + } + + return request +} + +func generateAttributes(m pcommon.Map, prefix string, count int) { + for i := 1; i <= count; i++ { + m.PutStr(fmt.Sprintf("%v-name-%v", prefix, i), fmt.Sprintf("value-%v", i)) + } +} + +func generateExemplars(exemplars pmetric.ExemplarSlice, count int, ts pcommon.Timestamp) { + for i := 1; i <= count; i++ { + e := exemplars.AppendEmpty() + e.SetTimestamp(ts) + e.SetDoubleValue(2.22) + e.SetSpanID(pcommon.SpanID{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}) + e.SetTraceID(pcommon.TraceID{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f}) + } +} From 151f6e0ed6e9e60c0c667da294c099bc3ece8047 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 30 Apr 2024 12:17:56 +0100 Subject: [PATCH 28/44] Add an assertion on the count of alerts before adding an active alert Signed-off-by: gotjosh --- rules/alerting_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rules/alerting_test.go b/rules/alerting_test.go index a3f97b2a4..28b4f2cdc 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -1038,6 +1038,8 @@ func TestAlertingRule_ActiveAlertsCount(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, ) + require.Equal(t, 0, rule.ActiveAlertsCount()) + // Set an active alert. lbls := labels.FromStrings("a1", "1") h := lbls.Hash() From ccfafae36d4a0d49a4b099fafbe56eb66ffcb6bb Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 30 Apr 2024 12:19:18 +0100 Subject: [PATCH 29/44] Rename QueryforStateSeries to QueryForStateSeries Signed-off-by: gotjosh --- rules/alerting.go | 4 ++-- rules/alerting_test.go | 2 +- rules/group.go | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rules/alerting.go b/rules/alerting.go index e357cc615..fb0fb35b1 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -268,8 +268,8 @@ func (r *AlertingRule) forStateSample(alert *Alert, ts time.Time, v float64) pro return s } -// QueryforStateSeries returns the series for ALERTS_FOR_STATE of the alert rule. -func (r *AlertingRule) QueryforStateSeries(ctx context.Context, q storage.Querier) (storage.SeriesSet, error) { +// QueryForStateSeries returns the series for ALERTS_FOR_STATE of the alert rule. +func (r *AlertingRule) QueryForStateSeries(ctx context.Context, q storage.Querier) (storage.SeriesSet, error) { // We use a sample to ease the building of matchers. // Don't provide an alert as we want matchers that match all series for the alert rule. smpl := r.forStateSample(nil, time.Now(), 0) diff --git a/rules/alerting_test.go b/rules/alerting_test.go index 28b4f2cdc..165f38b86 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -716,7 +716,7 @@ func TestQueryForStateSeries(t *testing.T) { matchersCount++ }) - seriesSet, err := rule.QueryforStateSeries(context.Background(), querier) + seriesSet, err := rule.QueryForStateSeries(context.Background(), querier) var series storage.Series for seriesSet.Next() { diff --git a/rules/group.go b/rules/group.go index 28a0ff6e1..e8a7d82d2 100644 --- a/rules/group.go +++ b/rules/group.go @@ -664,7 +664,7 @@ func (g *Group) RestoreForState(ts time.Time) { continue } - sset, err := alertRule.QueryforStateSeries(g.opts.Context, q) + sset, err := alertRule.QueryForStateSeries(g.opts.Context, q) if err != nil { level.Error(g.logger).Log( "msg", "Failed to restore 'for' state", From 63b09944b8502a64cb17330161c8520d24874aa4 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 30 Apr 2024 12:25:48 +0100 Subject: [PATCH 30/44] Use labels.Len() instead of manually counting the labels Signed-off-by: gotjosh --- rules/alerting_test.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/rules/alerting_test.go b/rules/alerting_test.go index 165f38b86..5fae3edd1 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -711,16 +711,12 @@ func TestQueryForStateSeries(t *testing.T) { ) sample := rule.forStateSample(nil, time.Time{}, 0) - var matchersCount int - sample.Metric.Range(func(l labels.Label) { - matchersCount++ - }) seriesSet, err := rule.QueryForStateSeries(context.Background(), querier) var series storage.Series for seriesSet.Next() { - if seriesSet.At().Labels().Len() == matchersCount { + if seriesSet.At().Labels().Len() == sample.Metric.Len() { series = seriesSet.At() break } From f63dbc3db2abee643c7e182690be5712c42f13ca Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 30 Apr 2024 12:39:07 +0100 Subject: [PATCH 31/44] Remove duplicated sorted and assignment of expected alerts. Signed-off-by: gotjosh --- rules/manager_test.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/rules/manager_test.go b/rules/manager_test.go index a3bd335d1..d74a7dfe1 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -397,14 +397,6 @@ func TestForStateRestore(t *testing.T) { group.Eval(context.TODO(), evalTime) } - exp := rule.ActiveAlerts() - for _, aa := range exp { - require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) - } - sort.Slice(exp, func(i, j int) bool { - return labels.Compare(exp[i].Labels, exp[j].Labels) < 0 - }) - // Prometheus goes down here. We create new rules and groups. type testInput struct { name string From 05ca082b079bb12dc7ad0f8c3940774df803401b Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 30 Apr 2024 12:43:09 +0100 Subject: [PATCH 32/44] Rename `alerts` to `expectedAlerts` in the test case input Signed-off-by: gotjosh --- rules/manager_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rules/manager_test.go b/rules/manager_test.go index d74a7dfe1..07159145f 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -401,7 +401,7 @@ func TestForStateRestore(t *testing.T) { type testInput struct { name string restoreDuration time.Duration - alerts []*Alert + expectedAlerts []*Alert num int noRestore bool @@ -414,7 +414,7 @@ func TestForStateRestore(t *testing.T) { { name: "normal restore (alerts were not firing)", restoreDuration: 15 * time.Minute, - alerts: rule.ActiveAlerts(), + expectedAlerts: rule.ActiveAlerts(), downDuration: 10 * time.Minute, }, { @@ -426,12 +426,12 @@ func TestForStateRestore(t *testing.T) { { name: "no active alerts", restoreDuration: 50 * time.Minute, - alerts: []*Alert{}, + expectedAlerts: []*Alert{}, }, { name: "test the grace period", restoreDuration: 25 * time.Minute, - alerts: []*Alert{}, + expectedAlerts: []*Alert{}, gracePeriod: true, before: func() { for _, duration := range []time.Duration{10 * time.Minute, 15 * time.Minute, 20 * time.Minute} { @@ -496,7 +496,7 @@ func TestForStateRestore(t *testing.T) { require.Equal(t, opts.ForGracePeriod, e.ActiveAt.Add(alertForDuration).Sub(restoreTime)) } default: - exp := tt.alerts + exp := tt.expectedAlerts require.Equal(t, len(exp), len(got)) sortAlerts(exp) sortAlerts(got) From 379dec9d368edbf083b5a7da9e8aa02e94bb079b Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 30 Apr 2024 13:09:30 +0100 Subject: [PATCH 33/44] querier.Select cannot return a nil series set. Signed-off-by: gotjosh --- rules/group.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rules/group.go b/rules/group.go index e8a7d82d2..3a97c9564 100644 --- a/rules/group.go +++ b/rules/group.go @@ -675,18 +675,18 @@ func (g *Group) RestoreForState(ts time.Time) { continue } - // No results for this alert rule. - if sset == nil { - level.Debug(g.logger).Log("msg", "Failed to find a series to restore the 'for' state", labels.AlertName, alertRule.Name()) - continue - } - // While not technically the same number of series we expect, it's as good of an approximation as any. seriesByLabels := make(map[string]storage.Series, alertRule.ActiveAlertsCount()) for sset.Next() { seriesByLabels[sset.At().Labels().DropMetricName().String()] = sset.At() } + // No results for this alert rule. + if len(seriesByLabels) == 0 { + level.Debug(g.logger).Log("msg", "Failed to find a series to restore the 'for' state", labels.AlertName, alertRule.Name()) + continue + } + alertRule.ForEachActiveAlert(func(a *Alert) { var s storage.Series From 965f507db506f90713aebfd8da46e27feb018ab9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Tue, 30 Apr 2024 16:22:16 +0200 Subject: [PATCH 34/44] ci: check generated parser code before running unit tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check that the generated parser code is consistent with the input definition. Remove the file before re-generating to make sure that missing goyacc is not effecting the check. Fixes: #7488 Signed-off-by: György Krajcsovits --- .github/workflows/ci.yml | 13 +++++++++++++ Makefile | 27 +++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 714faf167..41043f6c6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -143,6 +143,19 @@ jobs: with: parallelism: 12 thread: ${{ matrix.thread }} + check_generated_parser: + name: Check generated parser + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - name: Install Go + uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 + with: + cache: false + go-version: 1.22.x + - name: Run goyacc and check for diff + run: make install-goyacc check-generated-parser golangci: name: golangci-lint runs-on: ubuntu-latest diff --git a/Makefile b/Makefile index 61e8f4377..5dcebfd1a 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,7 @@ TSDB_BENCHMARK_DATASET ?= ./tsdb/testdata/20kseries.json TSDB_BENCHMARK_OUTPUT_DIR ?= ./benchout GOLANGCI_LINT_OPTS ?= --timeout 4m +GOYACC_VERSION ?= v0.6.0 include Makefile.common @@ -78,24 +79,42 @@ assets-tarball: assets @echo '>> packaging assets' scripts/package_assets.sh -# We only want to generate the parser when there's changes to the grammar. .PHONY: parser parser: @echo ">> running goyacc to generate the .go file." ifeq (, $(shell command -v goyacc 2> /dev/null)) @echo "goyacc not installed so skipping" - @echo "To install: go install golang.org/x/tools/cmd/goyacc@v0.6.0" + @echo "To install: \"go install golang.org/x/tools/cmd/goyacc@$(GOYACC_VERSION)\" or run \"make install-goyacc\"" else - goyacc -l -o promql/parser/generated_parser.y.go promql/parser/generated_parser.y + $(MAKE) promql/parser/generated_parser.y.go endif +promql/parser/generated_parser.y.go: promql/parser/generated_parser.y + @echo ">> running goyacc to generate the .go file." + @goyacc -l -o promql/parser/generated_parser.y.go promql/parser/generated_parser.y + +.PHONY: clean-parser +clean-parser: + @echo ">> cleaning generated parser" + @rm -f promql/parser/generated_parser.y.go + +.PHONY: check-generated-parser +check-generated-parser: clean-parser promql/parser/generated_parser.y.go + @echo ">> checking generated parser" + @git diff --exit-code -- promql/parser/generated_parser.y.go || (echo "Generated parser is out of date. Please run 'make parser' and commit the changes." && false) + +.PHONY: install-goyacc +install-goyacc: + @echo ">> installing goyacc $(GOYACC_VERSION)" + @go install golang.org/x/tools/cmd/goyacc@$(GOYACC_VERSION) + .PHONY: test # If we only want to only test go code we have to change the test target # which is called by all. ifeq ($(GO_ONLY),1) test: common-test check-go-mod-version else -test: common-test ui-build-module ui-test ui-lint check-go-mod-version +test: check-generated-parser common-test ui-build-module ui-test ui-lint check-go-mod-version endif .PHONY: npm_licenses From 12e317786b7ac864117f4be1a88a1aa29e5dcf9e Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Tue, 30 Apr 2024 18:44:27 +0200 Subject: [PATCH 35/44] Add missing OTLP fixes to changelog (#14014) Signed-off-by: Arve Knudsen --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a4d478ca..ea5fc9127 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ * [CHANGE] Rules: Execute 1 query instead of N (where N is the number of alerts within alert rule) when restoring alerts. #13980 * [ENHANCEMENT] Rules: Add `rule_group_last_restore_duration_seconds` to measure the time it takes to restore a rule group. #13974 * [ENHANCEMENT] OTLP: Improve remote write format translation performance by using label set hashes for metric identifiers instead of string based ones. #14006 #13991 +* [BUGFIX] OTLP: Don't generate target_info unless at least one identifying label is defined. #13991 +* [BUGFIX] OTLP: Don't generate target_info unless there are metrics. #13991 ## 2.51.2 / 2024-04-09 From f09cf2d9fb54b3b2ae407e58d18588149db3bff8 Mon Sep 17 00:00:00 2001 From: SuperQ Date: Thu, 2 May 2024 00:10:02 +0200 Subject: [PATCH 36/44] Update promu Update promu to the latest release. Signed-off-by: SuperQ --- Makefile.common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.common b/Makefile.common index 0acfb9d80..0e9ace29b 100644 --- a/Makefile.common +++ b/Makefile.common @@ -55,7 +55,7 @@ ifneq ($(shell command -v gotestsum 2> /dev/null),) endif endif -PROMU_VERSION ?= 0.15.0 +PROMU_VERSION ?= 0.17.0 PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_VERSION)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM).tar.gz SKIP_GOLANGCI_LINT := From ff1bcdb7b9b46a5ce06362233bf3929683c36038 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 May 2024 23:03:44 +0000 Subject: [PATCH 37/44] build(deps): bump golangci/golangci-lint-action in /scripts Bumps [golangci/golangci-lint-action](https://github.com/golangci/golangci-lint-action) from 4.0.0 to 5.1.0. - [Release notes](https://github.com/golangci/golangci-lint-action/releases) - [Commits](https://github.com/golangci/golangci-lint-action/compare/3cfe3a4abbb849e10058ce4af15d205b6da42804...9d1e0624a798bb64f6c3cea93db47765312263dc) --- updated-dependencies: - dependency-name: golangci/golangci-lint-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- scripts/golangci-lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/golangci-lint.yml b/scripts/golangci-lint.yml index a7a40c1be..b36f71c3c 100644 --- a/scripts/golangci-lint.yml +++ b/scripts/golangci-lint.yml @@ -33,6 +33,6 @@ jobs: run: sudo apt-get update && sudo apt-get -y install libsnmp-dev if: github.repository == 'prometheus/snmp_exporter' - name: Lint - uses: golangci/golangci-lint-action@3cfe3a4abbb849e10058ce4af15d205b6da42804 # v4.0.0 + uses: golangci/golangci-lint-action@9d1e0624a798bb64f6c3cea93db47765312263dc # v5.1.0 with: version: v1.56.2 From 1821803720418622311f382875d1cc9080dc2879 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 May 2024 23:05:44 +0000 Subject: [PATCH 38/44] build(deps): bump actions/checkout from 4.1.2 to 4.1.4 Bumps [actions/checkout](https://github.com/actions/checkout) from 4.1.2 to 4.1.4. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/9bb56186c3b09b4f86b1c65136769dd318469633...0ad4b8fadaa221de15dcec353f45205ec38ea70b) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/buf-lint.yml | 2 +- .github/workflows/buf.yml | 2 +- .github/workflows/ci.yml | 26 ++++++++++----------- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/container_description.yml | 4 ++-- .github/workflows/repo_sync.yml | 2 +- .github/workflows/scorecards.yml | 2 +- 7 files changed, 20 insertions(+), 20 deletions(-) diff --git a/.github/workflows/buf-lint.yml b/.github/workflows/buf-lint.yml index 806a706e1..fe8c4704b 100644 --- a/.github/workflows/buf-lint.yml +++ b/.github/workflows/buf-lint.yml @@ -12,7 +12,7 @@ jobs: name: lint runs-on: ubuntu-latest steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: bufbuild/buf-setup-action@517ee23296d5caf38df31c21945e6a54bbc8a89f # v1.30.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/buf.yml b/.github/workflows/buf.yml index 0fbd01f53..2156e8f19 100644 --- a/.github/workflows/buf.yml +++ b/.github/workflows/buf.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest if: github.repository_owner == 'prometheus' steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: bufbuild/buf-setup-action@517ee23296d5caf38df31c21945e6a54bbc8a89f # v1.30.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 41043f6c6..cead7abfd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,7 +13,7 @@ jobs: # should also be updated. image: quay.io/prometheus/golang-builder:1.22-base steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/setup_environment - run: make GOOPTS=--tags=stringlabels GO_ONLY=1 SKIP_GOLANGCI_LINT=1 @@ -27,7 +27,7 @@ jobs: container: image: quay.io/prometheus/golang-builder:1.22-base steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/setup_environment - run: go test --tags=dedupelabels ./... @@ -43,7 +43,7 @@ jobs: # The go version in this image should be N-1 wrt test_go. image: quay.io/prometheus/golang-builder:1.21-base steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - run: make build # Don't run NPM build; don't run race-detector. - run: make test GO_ONLY=1 test-flags="" @@ -57,7 +57,7 @@ jobs: image: quay.io/prometheus/golang-builder:1.22-base steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/setup_environment with: @@ -74,7 +74,7 @@ jobs: name: Go tests on Windows runs-on: windows-latest steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 with: go-version: 1.22.x @@ -91,7 +91,7 @@ jobs: container: image: quay.io/prometheus/golang-builder:1.22-base steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - run: go install ./cmd/promtool/. - run: go install github.com/google/go-jsonnet/cmd/jsonnet@latest - run: go install github.com/google/go-jsonnet/cmd/jsonnetfmt@latest @@ -114,7 +114,7 @@ jobs: matrix: thread: [ 0, 1, 2 ] steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/build with: @@ -137,7 +137,7 @@ jobs: # Whenever the Go version is updated here, .promu.yml # should also be updated. steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/build with: @@ -148,7 +148,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - name: Install Go uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 with: @@ -161,7 +161,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - name: Install Go uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 with: @@ -188,7 +188,7 @@ jobs: needs: [test_ui, test_go, test_go_more, test_go_oldest, test_windows, golangci, codeql, build_all] if: github.event_name == 'push' && github.event.ref == 'refs/heads/main' steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/publish_main with: @@ -202,7 +202,7 @@ jobs: needs: [test_ui, test_go, test_go_more, test_go_oldest, test_windows, golangci, codeql, build_all] if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.') steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/publish_release with: @@ -217,7 +217,7 @@ jobs: needs: [test_ui, codeql] steps: - name: Checkout - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - name: Install nodejs uses: actions/setup-node@60edb5dd545a775178f52524783378180af0d1f8 # v4.0.2 diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 836fb2568..561c22eab 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -24,7 +24,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - name: Initialize CodeQL uses: github/codeql-action/init@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # v3.22.12 diff --git a/.github/workflows/container_description.yml b/.github/workflows/container_description.yml index d0368eaa1..a7d7e150c 100644 --- a/.github/workflows/container_description.yml +++ b/.github/workflows/container_description.yml @@ -17,7 +17,7 @@ jobs: if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. steps: - name: git checkout - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - name: Set docker hub repo name run: echo "DOCKER_REPO_NAME=$(make docker-repo-name)" >> $GITHUB_ENV - name: Push README to Dockerhub @@ -37,7 +37,7 @@ jobs: if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. steps: - name: git checkout - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - name: Set quay.io org name run: echo "DOCKER_REPO=$(echo quay.io/${GITHUB_REPOSITORY_OWNER} | tr -d '-')" >> $GITHUB_ENV - name: Set quay.io repo name diff --git a/.github/workflows/repo_sync.yml b/.github/workflows/repo_sync.yml index 3458d7b11..f1c7ca5d0 100644 --- a/.github/workflows/repo_sync.yml +++ b/.github/workflows/repo_sync.yml @@ -13,7 +13,7 @@ jobs: container: image: quay.io/prometheus/golang-builder steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - run: ./scripts/sync_repo_files.sh env: GITHUB_TOKEN: ${{ secrets.PROMBOT_GITHUB_TOKEN }} diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 51ff643ab..ea13499d5 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -21,7 +21,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # tag=v4.1.2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # tag=v4.1.4 with: persist-credentials: false From 781815f064a37391c5ff8327863b1a479f93e891 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 May 2024 23:05:58 +0000 Subject: [PATCH 39/44] build(deps): bump actions/upload-artifact from 4.3.1 to 4.3.3 Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.3.1 to 4.3.3. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/5d5d22a31266ced268874388b861e4b58bb5c2f3...65462800fd760344b1a7b4382951275a0abb4808) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/fuzzing.yml | 2 +- .github/workflows/scorecards.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index 4c19563eb..dc510e596 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -21,7 +21,7 @@ jobs: fuzz-seconds: 600 dry-run: false - name: Upload Crash - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 if: failure() && steps.build.outcome == 'success' with: name: artifacts diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 51ff643ab..3f03c0c55 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -37,7 +37,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # tag=v4.3.1 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # tag=v4.3.3 with: name: SARIF file path: results.sarif From b90e1dfe6a7131580b289ef89b1daffb0b3447f8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 May 2024 23:21:27 +0000 Subject: [PATCH 40/44] build(deps): bump github.com/prometheus/prometheus Bumps [github.com/prometheus/prometheus](https://github.com/prometheus/prometheus) from 0.51.1 to 0.51.2. - [Release notes](https://github.com/prometheus/prometheus/releases) - [Changelog](https://github.com/prometheus/prometheus/blob/main/CHANGELOG.md) - [Commits](https://github.com/prometheus/prometheus/compare/v0.51.1...v0.51.2) --- updated-dependencies: - dependency-name: github.com/prometheus/prometheus dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- documentation/examples/remote_storage/go.mod | 2 +- documentation/examples/remote_storage/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/documentation/examples/remote_storage/go.mod b/documentation/examples/remote_storage/go.mod index dff988131..1ab2cec13 100644 --- a/documentation/examples/remote_storage/go.mod +++ b/documentation/examples/remote_storage/go.mod @@ -10,7 +10,7 @@ require ( github.com/influxdata/influxdb v1.11.5 github.com/prometheus/client_golang v1.19.0 github.com/prometheus/common v0.53.0 - github.com/prometheus/prometheus v0.51.1 + github.com/prometheus/prometheus v0.51.2 github.com/stretchr/testify v1.9.0 ) diff --git a/documentation/examples/remote_storage/go.sum b/documentation/examples/remote_storage/go.sum index b145f362f..9506ae638 100644 --- a/documentation/examples/remote_storage/go.sum +++ b/documentation/examples/remote_storage/go.sum @@ -279,8 +279,8 @@ github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4O github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= -github.com/prometheus/prometheus v0.51.1 h1:V2e7x2oiUC0Megp26+xjffxBf9EGkyP1iQuGd4VjUSU= -github.com/prometheus/prometheus v0.51.1/go.mod h1:yv4MwOn3yHMQ6MZGHPg/U7Fcyqf+rxqiZfSur6myVtc= +github.com/prometheus/prometheus v0.51.2 h1:U0faf1nT4CB9DkBW87XLJCBi2s8nwWXdTbyzRUAkX0w= +github.com/prometheus/prometheus v0.51.2/go.mod h1:yv4MwOn3yHMQ6MZGHPg/U7Fcyqf+rxqiZfSur6myVtc= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/scaleway/scaleway-sdk-go v1.0.0-beta.25 h1:/8rfZAdFfafRXOgz+ZpMZZWZ5pYggCY9t7e/BvjaBHM= From 753fdd513a5470f4aebefbadc0ae213ace8c6b9e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 May 2024 23:31:06 +0000 Subject: [PATCH 41/44] build(deps): bump github.com/linode/linodego from 1.32.0 to 1.33.0 Bumps [github.com/linode/linodego](https://github.com/linode/linodego) from 1.32.0 to 1.33.0. - [Release notes](https://github.com/linode/linodego/releases) - [Commits](https://github.com/linode/linodego/compare/v1.32.0...v1.33.0) --- updated-dependencies: - dependency-name: github.com/linode/linodego dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 165540e7f..0f279036f 100644 --- a/go.mod +++ b/go.mod @@ -41,7 +41,7 @@ require ( github.com/json-iterator/go v1.1.12 github.com/klauspost/compress v1.17.8 github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b - github.com/linode/linodego v1.32.0 + github.com/linode/linodego v1.33.0 github.com/miekg/dns v1.1.59 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f diff --git a/go.sum b/go.sum index 4b8602b05..457d9a66e 100644 --- a/go.sum +++ b/go.sum @@ -471,8 +471,8 @@ github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0 github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM= github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4= -github.com/linode/linodego v1.32.0 h1:OmZzB3iON6uu84VtLFf64uKmAQqJJarvmsVguroioPI= -github.com/linode/linodego v1.32.0/go.mod h1:y8GDP9uLVH4jTB9qyrgw79qfKdYJmNCGUOJmfuiOcmI= +github.com/linode/linodego v1.33.0 h1:cX2FYry7r6CA1ujBMsdqiM4VhvIQtnWsOuVblzfBhCw= +github.com/linode/linodego v1.33.0/go.mod h1:dSJJgIwqZCF5wnpuC6w5cyIbRtcexAm7uVvuJopGB40= github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= From bb513b2722b7c2367b0402c8583c609e538ce9df Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 May 2024 23:31:24 +0000 Subject: [PATCH 42/44] build(deps): bump google.golang.org/api from 0.174.0 to 0.177.0 Bumps [google.golang.org/api](https://github.com/googleapis/google-api-go-client) from 0.174.0 to 0.177.0. - [Release notes](https://github.com/googleapis/google-api-go-client/releases) - [Changelog](https://github.com/googleapis/google-api-go-client/blob/main/CHANGES.md) - [Commits](https://github.com/googleapis/google-api-go-client/compare/v0.174.0...v0.177.0) --- updated-dependencies: - dependency-name: google.golang.org/api dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- go.mod | 10 +++++----- go.sum | 20 ++++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/go.mod b/go.mod index 165540e7f..cd2b70d2a 100644 --- a/go.mod +++ b/go.mod @@ -80,10 +80,10 @@ require ( golang.org/x/sys v0.19.0 golang.org/x/time v0.5.0 golang.org/x/tools v0.20.0 - google.golang.org/api v0.174.0 + google.golang.org/api v0.177.0 google.golang.org/genproto/googleapis/api v0.0.0-20240415180920-8c6c420018be google.golang.org/grpc v1.63.2 - google.golang.org/protobuf v1.33.0 + google.golang.org/protobuf v1.34.0 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 k8s.io/api v0.29.3 @@ -94,8 +94,8 @@ require ( ) require ( - cloud.google.com/go/auth v0.2.0 // indirect - cloud.google.com/go/auth/oauth2adapt v0.2.0 // indirect + cloud.google.com/go/auth v0.3.0 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect cloud.google.com/go/compute/metadata v0.3.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.5.2 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect @@ -191,7 +191,7 @@ require ( golang.org/x/mod v0.17.0 // indirect golang.org/x/term v0.19.0 // indirect golang.org/x/text v0.14.0 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240415180920-8c6c420018be // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240429193739-8cf5692501f6 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gotest.tools/v3 v3.0.3 // indirect diff --git a/go.sum b/go.sum index 4b8602b05..ee7b3acc2 100644 --- a/go.sum +++ b/go.sum @@ -12,10 +12,10 @@ cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bP cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk= cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs= cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY= -cloud.google.com/go/auth v0.2.0 h1:y6oTcpMSbOcXbwYgUUrvI+mrQ2xbrcdpPgtVbCGTLTk= -cloud.google.com/go/auth v0.2.0/go.mod h1:+yb+oy3/P0geX6DLKlqiGHARGR6EX2GRtYCzWOCQSbU= -cloud.google.com/go/auth/oauth2adapt v0.2.0 h1:FR8zevgQwu+8CqiOT5r6xCmJa3pJC/wdXEEPF1OkNhA= -cloud.google.com/go/auth/oauth2adapt v0.2.0/go.mod h1:AfqujpDAlTfLfeCIl/HJZZlIxD8+nJoZ5e0x1IxGq5k= +cloud.google.com/go/auth v0.3.0 h1:PRyzEpGfx/Z9e8+lHsbkoUVXD0gnu4MNmm7Gp8TQNIs= +cloud.google.com/go/auth v0.3.0/go.mod h1:lBv6NKTWp8E3LPzmO1TbiiRKc4drLOfHsgmlH9ogv5w= +cloud.google.com/go/auth/oauth2adapt v0.2.2 h1:+TTV8aXpjeChS9M+aTtN/TjdQnzJvmzKFt//oWu7HX4= +cloud.google.com/go/auth/oauth2adapt v0.2.2/go.mod h1:wcYjgpZI9+Yu7LyYBg4pqSiaRkfEK3GQcpb7C/uyF1Q= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= @@ -1045,8 +1045,8 @@ google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/ google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= -google.golang.org/api v0.174.0 h1:zB1BWl7ocxfTea2aQ9mgdzXjnfPySllpPOskdnO+q34= -google.golang.org/api v0.174.0/go.mod h1:aC7tB6j0HR1Nl0ni5ghpx6iLasmAX78Zkh/wgxAAjLg= +google.golang.org/api v0.177.0 h1:8a0p/BbPa65GlqGWtUKxot4p0TV8OGOfyTjtmkXNXmk= +google.golang.org/api v0.177.0/go.mod h1:srbhue4MLjkjbkux5p3dw/ocYOSZTaIEvf7bCOnFQDw= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -1085,8 +1085,8 @@ google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7Fc google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto/googleapis/api v0.0.0-20240415180920-8c6c420018be h1:Zz7rLWqp0ApfsR/l7+zSHhY3PMiH2xqgxlfYfAfNpoU= google.golang.org/genproto/googleapis/api v0.0.0-20240415180920-8c6c420018be/go.mod h1:dvdCTIoAGbkWbcIKBniID56/7XHTt6WfxXNMxuziJ+w= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240415180920-8c6c420018be h1:LG9vZxsWGOmUKieR8wPAUR3u3MpnYFQZROPIMaXh7/A= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240415180920-8c6c420018be/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240429193739-8cf5692501f6 h1:DujSIu+2tC9Ht0aPNA7jgj23Iq8Ewi5sgkQ++wdvonE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240429193739-8cf5692501f6/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY= google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.0/go.mod h1:chYK+tFQF0nDUGJgXMSgLCQk3phJEuONr2DCgLDdAQM= @@ -1118,8 +1118,8 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= -google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +google.golang.org/protobuf v1.34.0 h1:Qo/qEd2RZPCf2nKuorzksSknv0d3ERwp1vFG38gSmH4= +google.golang.org/protobuf v1.34.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= From d2caf51874aab00aee17e4fd2efd02c879dfbbc6 Mon Sep 17 00:00:00 2001 From: Kushal shukla <85934954+kushalShukla-web@users.noreply.github.com> Date: Fri, 3 May 2024 18:12:39 +0530 Subject: [PATCH 43/44] removed formateoverview section (#13994) docs: Remove outdated information about remote-read API --------- Signed-off-by: kushagra Shukla Signed-off-by: Kushal shukla <85934954+kushalShukla-web@users.noreply.github.com> Signed-off-by: Arthur Silva Sens Co-authored-by: Arthur Silva Sens --- docs/querying/remote_read_api.md | 60 +------------------------------- 1 file changed, 1 insertion(+), 59 deletions(-) diff --git a/docs/querying/remote_read_api.md b/docs/querying/remote_read_api.md index e3dd13306..efbd08e98 100644 --- a/docs/querying/remote_read_api.md +++ b/docs/querying/remote_read_api.md @@ -5,63 +5,7 @@ sort_rank: 7 # Remote Read API -This is not currently considered part of the stable API and is subject to change -even between non-major version releases of Prometheus. - -## Format overview - -The API response format is JSON. Every successful API request returns a `2xx` -status code. - -Invalid requests that reach the API handlers return a JSON error object -and one of the following HTTP response codes: - -- `400 Bad Request` when parameters are missing or incorrect. -- `422 Unprocessable Entity` when an expression can't be executed - ([RFC4918](https://tools.ietf.org/html/rfc4918#page-78)). -- `503 Service Unavailable` when queries time out or abort. - -Other non-`2xx` codes may be returned for errors occurring before the API -endpoint is reached. - -An array of warnings may be returned if there are errors that do -not inhibit the request execution. All of the data that was successfully -collected will be returned in the data field. - -The JSON response envelope format is as follows: - -``` -{ - "status": "success" | "error", - "data": , - - // Only set if status is "error". The data field may still hold - // additional data. - "errorType": "", - "error": "", - - // Only if there were warnings while executing the request. - // There will still be data in the data field. - "warnings": [""] -} -``` - -Generic placeholders are defined as follows: - -* ``: Input timestamps may be provided either in -[RFC3339](https://www.ietf.org/rfc/rfc3339.txt) format or as a Unix timestamp -in seconds, with optional decimal places for sub-second precision. Output -timestamps are always represented as Unix timestamps in seconds. -* ``: Prometheus [time series -selectors](basics.md#time-series-selectors) like `http_requests_total` or -`http_requests_total{method=~"(GET|POST)"}` and need to be URL-encoded. -* ``: [Prometheus duration strings](basics.md#time_durations). -For example, `5m` refers to a duration of 5 minutes. -* ``: boolean values (strings `true` and `false`). - -Note: Names of query parameters that may be repeated end with `[]`. - -## Remote Read API +> This is not currently considered part of the stable API and is subject to change even between non-major version releases of Prometheus. This API provides data read functionality from Prometheus. This interface expects [snappy](https://github.com/google/snappy) compression. The API definition is located [here](https://github.com/prometheus/prometheus/blob/master/prompb/remote.proto). @@ -79,5 +23,3 @@ This returns a message that includes a list of raw samples. These streamed chunks utilize an XOR algorithm inspired by the [Gorilla](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf) compression to encode the chunks. However, it provides resolution to the millisecond instead of to the second. - - From c10186eeea1edbe466f8738fc8fcc8871a334f82 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Fri, 3 May 2024 14:23:46 +0100 Subject: [PATCH 44/44] BUGFIX: Mark the rule's restoration process as completed always (#14048) * BUGFIX: Mark the rule's restoration process as completed always In https://github.com/prometheus/prometheus/pull/13980 I introduced a change to reduce the number of queries executed when we restore alert statuses. With this, the querying semantics changed as we now need to go through all series before we enter the alert restoration loop and I missed the fact that exiting early when there are no rules to restore would lead to an incomplete restoration. An alert being restored is used as a proxy for "we're now ready to write `ALERTS/ALERTS_FOR_SERIES` metrics" so as a result we weren't writing the series if we didn't restore anything the first time around. --------- Signed-off-by: gotjosh --- rules/group.go | 6 +++++- rules/manager_test.go | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/rules/group.go b/rules/group.go index 3a97c9564..1f4757de3 100644 --- a/rules/group.go +++ b/rules/group.go @@ -672,6 +672,9 @@ func (g *Group) RestoreForState(ts time.Time) { "stage", "Select", "err", err, ) + // Even if we failed to query the `ALERT_FOR_STATE` series, we currently have no way to retry the restore process. + // So the best we can do is mark the rule as restored and let it eventually fire. + alertRule.SetRestored(true) continue } @@ -683,7 +686,8 @@ func (g *Group) RestoreForState(ts time.Time) { // No results for this alert rule. if len(seriesByLabels) == 0 { - level.Debug(g.logger).Log("msg", "Failed to find a series to restore the 'for' state", labels.AlertName, alertRule.Name()) + level.Debug(g.logger).Log("msg", "No series found to restore the 'for' state of the alert rule", labels.AlertName, alertRule.Name()) + alertRule.SetRestored(true) continue } diff --git a/rules/manager_test.go b/rules/manager_test.go index 07159145f..c45569bef 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -482,6 +482,9 @@ func TestForStateRestore(t *testing.T) { return labels.Compare(got[i].Labels, got[j].Labels) < 0 }) + // In all cases, we expect the restoration process to have completed. + require.Truef(t, newRule.Restored(), "expected the rule restoration process to have completed") + // Checking if we have restored it correctly. switch { case tt.noRestore: