Refactor concurrency control

Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
This commit is contained in:
Danny Kopping 2023-10-25 23:05:25 +02:00 committed by Marco Pracucci
parent ed2933ca60
commit e7758d187e
No known key found for this signature in database
GPG key ID: 74C1BD403D2DF9B5
4 changed files with 67 additions and 39 deletions

View file

@ -757,18 +757,19 @@ func main() {
queryEngine = promql.NewEngine(opts) queryEngine = promql.NewEngine(opts)
ruleManager = rules.NewManager(&rules.ManagerOptions{ ruleManager = rules.NewManager(&rules.ManagerOptions{
Appendable: fanoutStorage, Appendable: fanoutStorage,
Queryable: localStorage, Queryable: localStorage,
QueryFunc: rules.EngineQueryFunc(queryEngine, fanoutStorage), QueryFunc: rules.EngineQueryFunc(queryEngine, fanoutStorage),
NotifyFunc: rules.SendAlerts(notifierManager, cfg.web.ExternalURL.String()), NotifyFunc: rules.SendAlerts(notifierManager, cfg.web.ExternalURL.String()),
Context: ctxRule, Context: ctxRule,
ExternalURL: cfg.web.ExternalURL, ExternalURL: cfg.web.ExternalURL,
Registerer: prometheus.DefaultRegisterer, Registerer: prometheus.DefaultRegisterer,
Logger: log.With(logger, "component", "rule manager"), Logger: log.With(logger, "component", "rule manager"),
OutageTolerance: time.Duration(cfg.outageTolerance), OutageTolerance: time.Duration(cfg.outageTolerance),
ForGracePeriod: time.Duration(cfg.forGracePeriod), ForGracePeriod: time.Duration(cfg.forGracePeriod),
ResendDelay: time.Duration(cfg.resendDelay), ResendDelay: time.Duration(cfg.resendDelay),
MaxConcurrentEvals: cfg.maxConcurrentEvals, MaxConcurrentEvals: cfg.maxConcurrentEvals,
ConcurrentEvalsEnabled: cfg.enableConcurrentRuleEval,
}) })
} }

View file

@ -435,11 +435,12 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
} }
eval := func(i int, rule Rule, async bool) { eval := func(i int, rule Rule, async bool) {
if async { defer func() {
defer func() { if async {
g.opts.ConcurrentEvalSema.Release(1) g.opts.ConcurrencyController.Done()
}() }
} }()
logger := log.WithPrefix(g.logger, "name", rule.Name(), "index", i) logger := log.WithPrefix(g.logger, "name", rule.Name(), "index", i)
ctx, sp := otel.Tracer("").Start(ctx, "rule") ctx, sp := otel.Tracer("").Start(ctx, "rule")
sp.SetAttributes(attribute.String("name", rule.Name())) sp.SetAttributes(attribute.String("name", rule.Name()))
@ -568,7 +569,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
// If the rule has no dependencies, it can run concurrently because no other rules in this group depend on its output. // If the rule has no dependencies, it can run concurrently because no other rules in this group depend on its output.
// Try run concurrently if there are slots available. // Try run concurrently if there are slots available.
if g.dependencyMap.isIndependent(rule) && g.opts.ConcurrentEvalSema != nil && g.opts.ConcurrentEvalSema.TryAcquire(1) { if g.dependencyMap.isIndependent(rule) && g.opts.ConcurrencyController.Allow() {
go eval(i, rule, true) go eval(i, rule, true)
} else { } else {
eval(i, rule, false) eval(i, rule, false)

View file

@ -104,20 +104,21 @@ type NotifyFunc func(ctx context.Context, expr string, alerts ...*Alert)
// ManagerOptions bundles options for the Manager. // ManagerOptions bundles options for the Manager.
type ManagerOptions struct { type ManagerOptions struct {
ExternalURL *url.URL ExternalURL *url.URL
QueryFunc QueryFunc QueryFunc QueryFunc
NotifyFunc NotifyFunc NotifyFunc NotifyFunc
Context context.Context Context context.Context
Appendable storage.Appendable Appendable storage.Appendable
Queryable storage.Queryable Queryable storage.Queryable
Logger log.Logger Logger log.Logger
Registerer prometheus.Registerer Registerer prometheus.Registerer
OutageTolerance time.Duration OutageTolerance time.Duration
ForGracePeriod time.Duration ForGracePeriod time.Duration
ResendDelay time.Duration ResendDelay time.Duration
MaxConcurrentEvals int64 MaxConcurrentEvals int64
ConcurrentEvalSema *semaphore.Weighted ConcurrentEvalsEnabled bool
GroupLoader GroupLoader ConcurrencyController ConcurrencyController
GroupLoader GroupLoader
Metrics *Metrics Metrics *Metrics
} }
@ -133,7 +134,7 @@ func NewManager(o *ManagerOptions) *Manager {
o.GroupLoader = FileLoader{} o.GroupLoader = FileLoader{}
} }
o.ConcurrentEvalSema = semaphore.NewWeighted(o.MaxConcurrentEvals) o.ConcurrencyController = NewConcurrencyController(o.ConcurrentEvalsEnabled, o.MaxConcurrentEvals)
m := &Manager{ m := &Manager{
groups: map[string]*Group{}, groups: map[string]*Group{},
@ -408,3 +409,28 @@ func SendAlerts(s Sender, externalURL string) NotifyFunc {
} }
} }
} }
type ConcurrencyController struct {
enabled bool
sema *semaphore.Weighted
}
func NewConcurrencyController(enabled bool, maxConcurrency int64) ConcurrencyController {
return ConcurrencyController{enabled: enabled, sema: semaphore.NewWeighted(maxConcurrency)}
}
func (c ConcurrencyController) Allow() bool {
if !c.enabled {
return false
}
return c.sema.TryAcquire(1)
}
func (c ConcurrencyController) Done() {
if !c.enabled {
return
}
c.sema.Release(1)
}

View file

@ -30,7 +30,6 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"go.uber.org/atomic" "go.uber.org/atomic"
"go.uber.org/goleak" "go.uber.org/goleak"
"golang.org/x/sync/semaphore"
"gopkg.in/yaml.v2" "gopkg.in/yaml.v2"
"github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/labels"
@ -1672,7 +1671,7 @@ func TestAsyncRuleEvaluation(t *testing.T) {
for _, group := range groups { for _, group := range groups {
// Allow up to 2 concurrent rule evaluations. // Allow up to 2 concurrent rule evaluations.
group.opts.ConcurrentEvalSema = semaphore.NewWeighted(2) group.opts.ConcurrencyController = NewConcurrencyController(true, 2)
require.Len(t, group.rules, expectedRules) require.Len(t, group.rules, expectedRules)
start := time.Now() start := time.Now()
@ -1722,10 +1721,11 @@ func TestBoundedRuleEvalConcurrency(t *testing.T) {
files := []string{"fixtures/rules_multiple_groups.yaml"} files := []string{"fixtures/rules_multiple_groups.yaml"}
ruleManager := NewManager(&ManagerOptions{ ruleManager := NewManager(&ManagerOptions{
Context: context.Background(), Context: context.Background(),
Logger: log.NewNopLogger(), Logger: log.NewNopLogger(),
Appendable: storage, Appendable: storage,
MaxConcurrentEvals: maxConcurrency, ConcurrentEvalsEnabled: true,
MaxConcurrentEvals: maxConcurrency,
QueryFunc: func(ctx context.Context, q string, ts time.Time) (promql.Vector, error) { QueryFunc: func(ctx context.Context, q string, ts time.Time) (promql.Vector, error) {
inflightQueries.Add(1) inflightQueries.Add(1)
defer func() { defer func() {