diff --git a/discovery/manager.go b/discovery/manager.go index bb4409fea..3b13cf1cc 100644 --- a/discovery/manager.go +++ b/discovery/manager.go @@ -93,6 +93,7 @@ func NewManager(ctx context.Context, logger log.Logger) *Manager { targets: make(map[poolKey]map[string]*targetgroup.Group), discoverCancel: []context.CancelFunc{}, ctx: ctx, + updatert: 5 * time.Second, } } @@ -111,6 +112,10 @@ type Manager struct { providers []*provider // The sync channels sends the updates in map[targetSetName] where targetSetName is the job value from the scrape config. syncCh chan map[string][]*targetgroup.Group + + // How long to wait before sending updates to the channel. The variable + // should only be modified in unit tests. + updatert time.Duration } // Run starts the background processing @@ -166,7 +171,7 @@ func (m *Manager) startProvider(ctx context.Context, p *provider) { } func (m *Manager) updater(ctx context.Context, p *provider, updates chan []*targetgroup.Group) { - ticker := time.NewTicker(5 * time.Second) + ticker := time.NewTicker(m.updatert) defer ticker.Stop() triggerUpdate := make(chan struct{}, 1) @@ -181,11 +186,10 @@ func (m *Manager) updater(ctx context.Context, p *provider, updates chan []*targ select { case m.syncCh <- m.allGroups(): // Waiting until the receiver can accept the last update. level.Debug(m.logger).Log("msg", "discoverer exited", "provider", p.name) - return case <-ctx.Done(): - return } + return } for _, s := range p.subs { m.updateGroup(poolKey{setName: s, provider: p.name}, tgs) diff --git a/discovery/manager_test.go b/discovery/manager_test.go index d9566201f..d58f1643f 100644 --- a/discovery/manager_test.go +++ b/discovery/manager_test.go @@ -65,7 +65,7 @@ func TestTargetUpdatesOrder(t *testing.T) { "tp1": { { targetGroups: []targetgroup.Group{}, - interval: 5, + interval: 5 * time.Millisecond, }, }, }, @@ -79,19 +79,19 @@ func TestTargetUpdatesOrder(t *testing.T) { "tp1": { { targetGroups: []targetgroup.Group{}, - interval: 5, + interval: 5 * time.Millisecond, }, }, "tp2": { { targetGroups: []targetgroup.Group{}, - interval: 200, + interval: 200 * time.Millisecond, }, }, "tp3": { { targetGroups: []targetgroup.Group{}, - interval: 100, + interval: 100 * time.Millisecond, }, }, }, @@ -156,7 +156,7 @@ func TestTargetUpdatesOrder(t *testing.T) { Targets: []model.LabelSet{{"__instance__": "3"}}, }, }, - interval: 10, + interval: 10 * time.Millisecond, }, }, }, @@ -214,7 +214,7 @@ func TestTargetUpdatesOrder(t *testing.T) { Targets: []model.LabelSet{}, }, }, - interval: 10, + interval: 10 * time.Millisecond, }, }, }, @@ -273,7 +273,7 @@ func TestTargetUpdatesOrder(t *testing.T) { Targets: []model.LabelSet{{"__instance__": "1"}}, }, }, - interval: 10, + interval: 10 * time.Millisecond, }, }, }, @@ -319,7 +319,7 @@ func TestTargetUpdatesOrder(t *testing.T) { Targets: []model.LabelSet{{"__instance__": "2"}}, }, }, - interval: 10, + interval: 10 * time.Millisecond, }, { targetGroups: []targetgroup.Group{ @@ -332,7 +332,7 @@ func TestTargetUpdatesOrder(t *testing.T) { Targets: []model.LabelSet{{"__instance__": "4"}}, }, }, - interval: 500, + interval: 500 * time.Millisecond, }, }, "tp2": { @@ -347,7 +347,7 @@ func TestTargetUpdatesOrder(t *testing.T) { Targets: []model.LabelSet{{"__instance__": "6"}}, }, }, - interval: 100, + interval: 100 * time.Millisecond, }, { targetGroups: []targetgroup.Group{ @@ -360,7 +360,7 @@ func TestTargetUpdatesOrder(t *testing.T) { Targets: []model.LabelSet{{"__instance__": "8"}}, }, }, - interval: 10, + interval: 10 * time.Millisecond, }, }, }, @@ -470,7 +470,7 @@ func TestTargetUpdatesOrder(t *testing.T) { Targets: []model.LabelSet{{"__instance__": "2"}}, }, }, - interval: 10, + interval: 10 * time.Millisecond, }, { targetGroups: []targetgroup.Group{ @@ -483,7 +483,7 @@ func TestTargetUpdatesOrder(t *testing.T) { Targets: []model.LabelSet{{"__instance__": "4"}}, }, }, - interval: 150, + interval: 150 * time.Millisecond, }, }, "tp2": { @@ -498,7 +498,7 @@ func TestTargetUpdatesOrder(t *testing.T) { Targets: []model.LabelSet{{"__instance__": "6"}}, }, }, - interval: 200, + interval: 200 * time.Millisecond, }, { targetGroups: []targetgroup.Group{ @@ -511,7 +511,7 @@ func TestTargetUpdatesOrder(t *testing.T) { Targets: []model.LabelSet{{"__instance__": "8"}}, }, }, - interval: 100, + interval: 100 * time.Millisecond, }, }, }, @@ -590,7 +590,7 @@ func TestTargetUpdatesOrder(t *testing.T) { Targets: []model.LabelSet{{"__instance__": "2"}}, }, }, - interval: 30, + interval: 30 * time.Millisecond, }, { targetGroups: []targetgroup.Group{ @@ -603,7 +603,7 @@ func TestTargetUpdatesOrder(t *testing.T) { Targets: []model.LabelSet{}, }, }, - interval: 10, + interval: 10 * time.Millisecond, }, { targetGroups: []targetgroup.Group{ @@ -616,7 +616,7 @@ func TestTargetUpdatesOrder(t *testing.T) { Targets: []model.LabelSet{{"__instance__": "4"}}, }, }, - interval: 300, + interval: 300 * time.Millisecond, }, }, }, @@ -655,53 +655,69 @@ func TestTargetUpdatesOrder(t *testing.T) { }, } - for testIndex, testCase := range testCases { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger()) + for i, tc := range testCases { + tc := tc + t.Run(tc.title, func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() - var totalUpdatesCount int + discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager.updatert = 100 * time.Millisecond - provUpdates := make(chan []*targetgroup.Group) - for _, up := range testCase.updates { - go newMockDiscoveryProvider(up).Run(ctx, provUpdates) - if len(up) > 0 { - totalUpdatesCount = totalUpdatesCount + len(up) + var totalUpdatesCount int + provUpdates := make(chan []*targetgroup.Group) + for _, up := range tc.updates { + go newMockDiscoveryProvider(up...).Run(ctx, provUpdates) + if len(up) > 0 { + totalUpdatesCount = totalUpdatesCount + len(up) + } } - } - Loop: - for x := 0; x < totalUpdatesCount; x++ { - select { - case <-time.After(10 * time.Second): - t.Errorf("%v. %q: no update arrived within the timeout limit", x, testCase.title) - break Loop - case tgs := <-provUpdates: - discoveryManager.updateGroup(poolKey{setName: strconv.Itoa(testIndex), provider: testCase.title}, tgs) - for _, received := range discoveryManager.allGroups() { - // Need to sort by the Groups source as the received order is not guaranteed. - sort.Sort(byGroupSource(received)) - if !reflect.DeepEqual(received, testCase.expectedTargets[x]) { - var receivedFormated string - for _, receivedTargets := range received { - receivedFormated = receivedFormated + receivedTargets.Source + ":" + fmt.Sprint(receivedTargets.Targets) - } - var expectedFormated string - for _, expectedTargets := range testCase.expectedTargets[x] { - expectedFormated = expectedFormated + expectedTargets.Source + ":" + fmt.Sprint(expectedTargets.Targets) - } - - t.Errorf("%v. %v: \ntargets mismatch \nreceived: %v \nexpected: %v", - x, testCase.title, - receivedFormated, - expectedFormated) + Loop: + for x := 0; x < totalUpdatesCount; x++ { + select { + case <-ctx.Done(): + t.Errorf("%d: no update arrived within the timeout limit", x) + break Loop + case tgs := <-provUpdates: + discoveryManager.updateGroup(poolKey{setName: strconv.Itoa(i), provider: tc.title}, tgs) + for _, got := range discoveryManager.allGroups() { + assertEqualGroups(t, got, tc.expectedTargets[x], func(got, expected string) string { + return fmt.Sprintf("%d: \ntargets mismatch \ngot: %v \nexpected: %v", + x, + got, + expected) + }) } } } - } + }) } } +func assertEqualGroups(t *testing.T, got, expected []*targetgroup.Group, msg func(got, expected string) string) { + t.Helper() + format := func(groups []*targetgroup.Group) string { + var s string + for i, group := range groups { + if i > 0 { + s += "," + } + s += group.Source + ":" + fmt.Sprint(group.Targets) + } + return s + } + + // Need to sort by the groups's source as the received order is not guaranteed. + sort.Sort(byGroupSource(got)) + sort.Sort(byGroupSource(expected)) + + if !reflect.DeepEqual(got, expected) { + t.Errorf(msg(format(got), format(expected))) + } + +} + func verifyPresence(t *testing.T, tSets map[poolKey]map[string]*targetgroup.Group, poolKey poolKey, label string, present bool) { if _, ok := tSets[poolKey]; !ok { t.Fatalf("'%s' should be present in Pool keys: %v", poolKey, tSets) @@ -745,6 +761,7 @@ scrape_configs: ctx, cancel := context.WithCancel(context.Background()) defer cancel() discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() c := make(map[string]sd_config.ServiceDiscoveryConfig) @@ -813,6 +830,7 @@ scrape_configs: ctx, cancel := context.WithCancel(context.Background()) defer cancel() discoveryManager := NewManager(ctx, nil) + discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() c := make(map[string]sd_config.ServiceDiscoveryConfig) @@ -851,6 +869,7 @@ scrape_configs: ctx, cancel := context.WithCancel(context.Background()) defer cancel() discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() c := make(map[string]sd_config.ServiceDiscoveryConfig) @@ -868,6 +887,166 @@ scrape_configs: } } +func TestCoordinationWithReceiver(t *testing.T) { + updateDelay := 100 * time.Millisecond + + type expect struct { + delay time.Duration + tgs map[string][]*targetgroup.Group + } + + testCases := []struct { + title string + providers map[string]Discoverer + expected []expect + }{ + { + title: "Receiver should get all updates even when one provider closes its channel", + providers: map[string]Discoverer{ + "once1": &onceProvider{ + tgs: []*targetgroup.Group{ + { + Source: "tg1", + Targets: []model.LabelSet{{"__instance__": "1"}}, + }, + }, + }, + "mock1": newMockDiscoveryProvider( + update{ + interval: 2 * updateDelay, + targetGroups: []targetgroup.Group{ + { + Source: "tg2", + Targets: []model.LabelSet{{"__instance__": "2"}}, + }, + }, + }, + ), + }, + expected: []expect{ + { + tgs: map[string][]*targetgroup.Group{ + "once1": []*targetgroup.Group{ + { + Source: "tg1", + Targets: []model.LabelSet{{"__instance__": "1"}}, + }, + }, + }, + }, + { + tgs: map[string][]*targetgroup.Group{ + "once1": []*targetgroup.Group{ + { + Source: "tg1", + Targets: []model.LabelSet{{"__instance__": "1"}}, + }, + }, + "mock1": []*targetgroup.Group{ + { + Source: "tg2", + Targets: []model.LabelSet{{"__instance__": "2"}}, + }, + }, + }, + }, + }, + }, + { + title: "Receiver should get all updates even when the channel is blocked", + providers: map[string]Discoverer{ + "mock1": newMockDiscoveryProvider( + update{ + targetGroups: []targetgroup.Group{ + { + Source: "tg1", + Targets: []model.LabelSet{{"__instance__": "1"}}, + }, + }, + }, + update{ + interval: 4 * updateDelay, + targetGroups: []targetgroup.Group{ + { + Source: "tg2", + Targets: []model.LabelSet{{"__instance__": "2"}}, + }, + }, + }, + ), + }, + expected: []expect{ + { + delay: 2 * updateDelay, + tgs: map[string][]*targetgroup.Group{ + "mock1": []*targetgroup.Group{ + { + Source: "tg1", + Targets: []model.LabelSet{{"__instance__": "1"}}, + }, + }, + }, + }, + { + delay: 4 * updateDelay, + tgs: map[string][]*targetgroup.Group{ + "mock1": []*targetgroup.Group{ + { + Source: "tg1", + Targets: []model.LabelSet{{"__instance__": "1"}}, + }, + { + Source: "tg2", + Targets: []model.LabelSet{{"__instance__": "2"}}, + }, + }, + }, + }, + }, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.title, func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + mgr := NewManager(ctx, nil) + mgr.updatert = updateDelay + go mgr.Run() + + for name, p := range tc.providers { + mgr.StartCustomProvider(ctx, name, p) + } + + for i, expected := range tc.expected { + time.Sleep(expected.delay) + select { + case <-ctx.Done(): + t.Fatalf("step %d: no update received in the expected timeframe", i) + case tgs, ok := <-mgr.SyncCh(): + if !ok { + t.Fatalf("step %d: discovery manager channel is closed", i) + } + if len(tgs) != len(expected.tgs) { + t.Fatalf("step %d: target groups mismatch, got: %d, expected: %d\ngot: %#v\nexpected: %#v", + i, len(tgs), len(expected.tgs), tgs, expected.tgs) + } + for k := range expected.tgs { + if _, ok := tgs[k]; !ok { + t.Fatalf("step %d: target group not found: %s\ngot: %#v", i, k, tgs) + } + assertEqualGroups(t, tgs[k], expected.tgs[k], func(got, expected string) string { + return fmt.Sprintf("step %d: targets mismatch \ngot: %q \nexpected: %q", i, got, expected) + }) + } + } + } + }) + } +} + type update struct { targetGroups []targetgroup.Group interval time.Duration @@ -875,33 +1054,37 @@ type update struct { type mockdiscoveryProvider struct { updates []update - up chan<- []*targetgroup.Group } -func newMockDiscoveryProvider(updates []update) mockdiscoveryProvider { - +func newMockDiscoveryProvider(updates ...update) mockdiscoveryProvider { tp := mockdiscoveryProvider{ updates: updates, } return tp } -func (tp mockdiscoveryProvider) Run(ctx context.Context, up chan<- []*targetgroup.Group) { - tp.up = up - tp.sendUpdates() -} - -func (tp mockdiscoveryProvider) sendUpdates() { - for _, update := range tp.updates { - - time.Sleep(update.interval * time.Millisecond) - - tgs := make([]*targetgroup.Group, len(update.targetGroups)) - for i := range update.targetGroups { - tgs[i] = &update.targetGroups[i] +func (tp mockdiscoveryProvider) Run(ctx context.Context, upCh chan<- []*targetgroup.Group) { + for _, u := range tp.updates { + if u.interval > 0 { + t := time.NewTicker(u.interval) + defer t.Stop() + Loop: + for { + select { + case <-ctx.Done(): + return + case <-t.C: + break Loop + } + } } - tp.up <- tgs + tgs := make([]*targetgroup.Group, len(u.targetGroups)) + for i := range u.targetGroups { + tgs[i] = &u.targetGroups[i] + } + upCh <- tgs } + <-ctx.Done() } // byGroupSource implements sort.Interface so we can sort by the Source field. @@ -910,3 +1093,15 @@ type byGroupSource []*targetgroup.Group func (a byGroupSource) Len() int { return len(a) } func (a byGroupSource) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a byGroupSource) Less(i, j int) bool { return a[i].Source < a[j].Source } + +// onceProvider sends updates once (if any) and closes the update channel. +type onceProvider struct { + tgs []*targetgroup.Group +} + +func (o onceProvider) Run(_ context.Context, ch chan<- []*targetgroup.Group) { + if len(o.tgs) > 0 { + ch <- o.tgs + } + close(ch) +}