mirror of
https://github.com/prometheus/prometheus.git
synced 2024-11-09 23:24:05 -08:00
BUGFIX: Mark the rule's restoration process as completed always (#14048)
* BUGFIX: Mark the rule's restoration process as completed always In https://github.com/prometheus/prometheus/pull/13980 I introduced a change to reduce the number of queries executed when we restore alert statuses. With this, the querying semantics changed as we now need to go through all series before we enter the alert restoration loop and I missed the fact that exiting early when there are no rules to restore would lead to an incomplete restoration. An alert being restored is used as a proxy for "we're now ready to write `ALERTS/ALERTS_FOR_SERIES` metrics" so as a result we weren't writing the series if we didn't restore anything the first time around. --------- Signed-off-by: gotjosh <josue.abreu@gmail.com>
This commit is contained in:
parent
d2caf51874
commit
c10186eeea
|
@ -672,6 +672,9 @@ func (g *Group) RestoreForState(ts time.Time) {
|
|||
"stage", "Select",
|
||||
"err", err,
|
||||
)
|
||||
// Even if we failed to query the `ALERT_FOR_STATE` series, we currently have no way to retry the restore process.
|
||||
// So the best we can do is mark the rule as restored and let it eventually fire.
|
||||
alertRule.SetRestored(true)
|
||||
continue
|
||||
}
|
||||
|
||||
|
@ -683,7 +686,8 @@ func (g *Group) RestoreForState(ts time.Time) {
|
|||
|
||||
// No results for this alert rule.
|
||||
if len(seriesByLabels) == 0 {
|
||||
level.Debug(g.logger).Log("msg", "Failed to find a series to restore the 'for' state", labels.AlertName, alertRule.Name())
|
||||
level.Debug(g.logger).Log("msg", "No series found to restore the 'for' state of the alert rule", labels.AlertName, alertRule.Name())
|
||||
alertRule.SetRestored(true)
|
||||
continue
|
||||
}
|
||||
|
||||
|
|
|
@ -482,6 +482,9 @@ func TestForStateRestore(t *testing.T) {
|
|||
return labels.Compare(got[i].Labels, got[j].Labels) < 0
|
||||
})
|
||||
|
||||
// In all cases, we expect the restoration process to have completed.
|
||||
require.Truef(t, newRule.Restored(), "expected the rule restoration process to have completed")
|
||||
|
||||
// Checking if we have restored it correctly.
|
||||
switch {
|
||||
case tt.noRestore:
|
||||
|
|
Loading…
Reference in a new issue