Option to align rule group's evaluation time to interval (#400)

* Allow rule groups evaluation timestamp to be aligned on the evaluation interval.

Signed-off-by: Peter Štibraný <pstibrany@gmail.com>
This commit is contained in:
Peter Štibraný 2023-01-19 14:51:26 +01:00 committed by GitHub
parent fa6d2a8ede
commit 806e71e828
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 163 additions and 58 deletions

View file

@ -141,6 +141,7 @@ type RuleGroup struct {
Limit int `yaml:"limit,omitempty"`
Rules []RuleNode `yaml:"rules"`
SourceTenants []string `yaml:"source_tenants,omitempty"`
AlignExecutionTimeOnInterval bool `yaml:"align_execution_time_on_interval,omitempty"`
}
// Rule describes an alerting or recording rule.

View file

@ -0,0 +1,27 @@
groups:
- name: aligned
align_execution_time_on_interval: true
interval: 5m
rules:
- record: job:http_requests:rate5m
expr: sum by (job)(rate(http_requests_total[5m]))
- name: aligned_with_crazy_interval
align_execution_time_on_interval: true
interval: 1m27s
rules:
- record: job:http_requests:rate5m
expr: sum by (job)(rate(http_requests_total[5m]))
- name: unaligned_default
interval: 5m
rules:
- record: job:http_requests:rate5m
expr: sum by (job)(rate(http_requests_total[5m]))
- name: unaligned_explicit
interval: 5m
align_execution_time_on_interval: false
rules:
- record: job:http_requests:rate5m
expr: sum by (job)(rate(http_requests_total[5m]))

View file

@ -271,6 +271,7 @@ type Group struct {
metrics *Metrics
ruleGroupPostProcessFunc RuleGroupPostProcessFunc
alignExecutionTimeOnInterval bool
}
// This function will be used before each rule group evaluation if not nil.
@ -288,6 +289,7 @@ type GroupOptions struct {
EvaluationDelay *time.Duration
done chan struct{}
RuleGroupPostProcessFunc RuleGroupPostProcessFunc
AlignExecutionTimeOnInterval bool
}
// NewGroup makes a new Group with the given name, options, and rules.
@ -325,6 +327,7 @@ func NewGroup(o GroupOptions) *Group {
logger: log.With(o.Opts.Logger, "file", o.File, "group", o.Name),
metrics: metrics,
ruleGroupPostProcessFunc: o.RuleGroupPostProcessFunc,
alignExecutionTimeOnInterval: o.AlignExecutionTimeOnInterval,
}
}
@ -547,11 +550,13 @@ func (g *Group) setLastEvaluation(ts time.Time) {
// EvalTimestamp returns the immediately preceding consistently slotted evaluation time.
func (g *Group) EvalTimestamp(startTime int64) time.Time {
var (
var offset int64
if !g.alignExecutionTimeOnInterval {
offset = int64(g.hash() % uint64(g.interval))
adjNow = startTime - offset
base = adjNow - (adjNow % int64(g.interval))
)
}
adjNow := startTime - offset
base := adjNow - (adjNow % int64(g.interval))
return time.Unix(0, base+offset).UTC()
}
@ -928,6 +933,10 @@ func (g *Group) Equals(ng *Group) bool {
return false
}
if g.alignExecutionTimeOnInterval != ng.alignExecutionTimeOnInterval {
return false
}
for i, gr := range g.rules {
if gr.String() != ng.rules[i].String() {
return false
@ -1207,6 +1216,7 @@ func (m *Manager) LoadGroups(
EvaluationDelay: (*time.Duration)(rg.EvaluationDelay),
done: m.done,
RuleGroupPostProcessFunc: ruleGroupPostProcessFunc,
AlignExecutionTimeOnInterval: rg.AlignExecutionTimeOnInterval,
})
}
}

View file

@ -919,6 +919,71 @@ func TestUpdateSetsSourceTenants(t *testing.T) {
}
}
func TestAlignEvaluationTimeOnInterval(t *testing.T) {
st := teststorage.New(t)
defer st.Close()
opts := promql.EngineOpts{
Logger: nil,
Reg: nil,
MaxSamples: 10,
Timeout: 10 * time.Second,
}
engine := promql.NewEngine(opts)
ruleManager := NewManager(&ManagerOptions{
Appendable: st,
Queryable: st,
QueryFunc: EngineQueryFunc(engine, st),
Context: context.Background(),
Logger: log.NewNopLogger(),
})
ruleManager.start()
defer ruleManager.Stop()
rgs, errs := rulefmt.ParseFile("fixtures/rules_with_alignment.yaml")
require.Empty(t, errs, "file parsing failures")
tmpFile, err := os.CreateTemp("", "rules.test.*.yaml")
require.NoError(t, err)
defer os.Remove(tmpFile.Name())
defer tmpFile.Close()
reloadRules(rgs, t, tmpFile, ruleManager, 0)
// Verify that all groups are loaded, and let's check their evaluation times.
loadedGroups := ruleManager.RuleGroups()
require.Len(t, loadedGroups, len(rgs.Groups))
assertGroupEvalTimeAlignedOnIntervalIsHonored := func(groupName string, expectedAligned bool) {
g := (*Group)(nil)
for _, lg := range loadedGroups {
if lg.name == groupName {
g = lg
break
}
}
require.NotNil(t, g, "group not found: %s", groupName)
// When "g.hash() % g.interval == 0" alignment cannot be checked, because aligned and unaligned eval timestamps
// would be the same. This can happen because g.hash() depends on path passed to ruleManager.Update function,
// and this test uses temporary directory for storing rule group files.
if g.hash()%uint64(g.interval) == 0 {
t.Skip("skipping test, because rule group hash is divisible by interval, which makes eval timestamp always aligned to the interval")
}
now := time.Now()
ts := g.EvalTimestamp(now.UnixNano())
aligned := ts.UnixNano()%g.interval.Nanoseconds() == 0
require.Equal(t, expectedAligned, aligned, "group: %s, hash: %d, now: %d", groupName, g.hash(), now.UnixNano())
}
assertGroupEvalTimeAlignedOnIntervalIsHonored("aligned", true)
assertGroupEvalTimeAlignedOnIntervalIsHonored("aligned_with_crazy_interval", true)
assertGroupEvalTimeAlignedOnIntervalIsHonored("unaligned_default", false)
assertGroupEvalTimeAlignedOnIntervalIsHonored("unaligned_explicit", false)
}
func TestGroupEvaluationContextFuncIsCalledWhenSupplied(t *testing.T) {
type testContextKeyType string
var testContextKey testContextKeyType = "TestGroupEvaluationContextFuncIsCalledWhenSupplied"
@ -980,6 +1045,7 @@ type ruleGroupTest struct {
Limit int `yaml:"limit,omitempty"`
Rules []rulefmt.Rule `yaml:"rules"`
SourceTenants []string `yaml:"source_tenants,omitempty"`
AlignExecutionTimeOnInterval bool `yaml:"align_execution_time_on_interval,omitempty"`
}
func formatRules(r *rulefmt.RuleGroups) ruleGroupsTest {
@ -1003,6 +1069,7 @@ func formatRules(r *rulefmt.RuleGroups) ruleGroupsTest {
Limit: g.Limit,
Rules: rtmp,
SourceTenants: g.SourceTenants,
AlignExecutionTimeOnInterval: g.AlignExecutionTimeOnInterval,
})
}
return ruleGroupsTest{