prometheus/discovery/manager_test.go
machine424 d23d196db5
Some checks failed
buf.build / lint and publish (push) Has been cancelled
CI / Go tests (push) Has been cancelled
CI / More Go tests (push) Has been cancelled
CI / Go tests with previous Go version (push) Has been cancelled
CI / UI tests (push) Has been cancelled
CI / Go tests on Windows (push) Has been cancelled
CI / Mixins tests (push) Has been cancelled
CI / Build Prometheus for common architectures (0) (push) Has been cancelled
CI / Build Prometheus for common architectures (1) (push) Has been cancelled
CI / Build Prometheus for common architectures (2) (push) Has been cancelled
CI / Build Prometheus for all architectures (0) (push) Has been cancelled
CI / Build Prometheus for all architectures (1) (push) Has been cancelled
CI / Build Prometheus for all architectures (10) (push) Has been cancelled
CI / Build Prometheus for all architectures (11) (push) Has been cancelled
CI / Build Prometheus for all architectures (2) (push) Has been cancelled
CI / Build Prometheus for all architectures (3) (push) Has been cancelled
CI / Build Prometheus for all architectures (4) (push) Has been cancelled
CI / Build Prometheus for all architectures (5) (push) Has been cancelled
CI / Build Prometheus for all architectures (6) (push) Has been cancelled
CI / Build Prometheus for all architectures (7) (push) Has been cancelled
CI / Build Prometheus for all architectures (8) (push) Has been cancelled
CI / Build Prometheus for all architectures (9) (push) Has been cancelled
CI / Check generated parser (push) Has been cancelled
CI / golangci-lint (push) Has been cancelled
CI / fuzzing (push) Has been cancelled
CI / codeql (push) Has been cancelled
Scorecards supply-chain security / Scorecards analysis (push) Has been cancelled
CI / Report status of build Prometheus for all architectures (push) Has been cancelled
CI / Publish main branch artifacts (push) Has been cancelled
CI / Publish release artefacts (push) Has been cancelled
CI / Publish UI on npm Registry (push) Has been cancelled
fix(discovery): prevent the manager from storing stale targetGroups
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
2024-08-30 14:39:31 +02:00

1566 lines
41 KiB
Go

// Copyright 2016 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package discovery
import (
"context"
"fmt"
"sort"
"strconv"
"sync"
"testing"
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
client_testutil "github.com/prometheus/client_golang/prometheus/testutil"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/testutil"
)
func TestMain(m *testing.M) {
testutil.TolerantVerifyLeak(m)
}
func NewTestMetrics(t *testing.T, reg prometheus.Registerer) (RefreshMetricsManager, map[string]DiscovererMetrics) {
refreshMetrics := NewRefreshMetrics(reg)
sdMetrics, err := RegisterSDMetrics(reg, refreshMetrics)
require.NoError(t, err)
return refreshMetrics, sdMetrics
}
// TestTargetUpdatesOrder checks that the target updates are received in the expected order.
func TestTargetUpdatesOrder(t *testing.T) {
// The order by which the updates are send is determined by the interval passed to the mock discovery adapter
// Final targets array is ordered alphabetically by the name of the discoverer.
// For example discoverer "A" with targets "t2,t3" and discoverer "B" with targets "t1,t2" will result in "t2,t3,t1,t2" after the merge.
testCases := []struct {
title string
updates map[string][]update
expectedTargets [][]*targetgroup.Group
}{
{
title: "Single TP no updates",
updates: map[string][]update{
"tp1": {},
},
expectedTargets: nil,
},
{
title: "Multiple TPs no updates",
updates: map[string][]update{
"tp1": {},
"tp2": {},
"tp3": {},
},
expectedTargets: nil,
},
{
title: "Single TP empty initials",
updates: map[string][]update{
"tp1": {
{
targetGroups: []targetgroup.Group{},
interval: 5 * time.Millisecond,
},
},
},
expectedTargets: [][]*targetgroup.Group{
{},
},
},
{
title: "Multiple TPs empty initials",
updates: map[string][]update{
"tp1": {
{
targetGroups: []targetgroup.Group{},
interval: 5 * time.Millisecond,
},
},
"tp2": {
{
targetGroups: []targetgroup.Group{},
interval: 200 * time.Millisecond,
},
},
"tp3": {
{
targetGroups: []targetgroup.Group{},
interval: 100 * time.Millisecond,
},
},
},
expectedTargets: [][]*targetgroup.Group{
{},
{},
{},
},
},
{
title: "Single TP initials only",
updates: map[string][]update{
"tp1": {
{
targetGroups: []targetgroup.Group{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
},
},
},
},
expectedTargets: [][]*targetgroup.Group{
{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
},
},
},
{
title: "Multiple TPs initials only",
updates: map[string][]update{
"tp1": {
{
targetGroups: []targetgroup.Group{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
},
},
},
"tp2": {
{
targetGroups: []targetgroup.Group{
{
Source: "tp2_group1",
Targets: []model.LabelSet{{"__instance__": "3"}},
},
},
interval: 10 * time.Millisecond,
},
},
},
expectedTargets: [][]*targetgroup.Group{
{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
}, {
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
{
Source: "tp2_group1",
Targets: []model.LabelSet{{"__instance__": "3"}},
},
},
},
},
{
title: "Single TP initials followed by empty updates",
updates: map[string][]update{
"tp1": {
{
targetGroups: []targetgroup.Group{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
},
interval: 0,
},
{
targetGroups: []targetgroup.Group{
{
Source: "tp1_group1",
Targets: []model.LabelSet{},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{},
},
},
interval: 10 * time.Millisecond,
},
},
},
expectedTargets: [][]*targetgroup.Group{
{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
},
{
{
Source: "tp1_group1",
Targets: []model.LabelSet{},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{},
},
},
},
},
{
title: "Single TP initials and new groups",
updates: map[string][]update{
"tp1": {
{
targetGroups: []targetgroup.Group{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
},
interval: 0,
},
{
targetGroups: []targetgroup.Group{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "3"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "4"}},
},
{
Source: "tp1_group3",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
},
interval: 10 * time.Millisecond,
},
},
},
expectedTargets: [][]*targetgroup.Group{
{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
},
{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "3"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "4"}},
},
{
Source: "tp1_group3",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
},
},
},
{
title: "Multiple TPs initials and new groups",
updates: map[string][]update{
"tp1": {
{
targetGroups: []targetgroup.Group{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
},
interval: 10 * time.Millisecond,
},
{
targetGroups: []targetgroup.Group{
{
Source: "tp1_group3",
Targets: []model.LabelSet{{"__instance__": "3"}},
},
{
Source: "tp1_group4",
Targets: []model.LabelSet{{"__instance__": "4"}},
},
},
interval: 500 * time.Millisecond,
},
},
"tp2": {
{
targetGroups: []targetgroup.Group{
{
Source: "tp2_group1",
Targets: []model.LabelSet{{"__instance__": "5"}},
},
{
Source: "tp2_group2",
Targets: []model.LabelSet{{"__instance__": "6"}},
},
},
interval: 100 * time.Millisecond,
},
{
targetGroups: []targetgroup.Group{
{
Source: "tp2_group3",
Targets: []model.LabelSet{{"__instance__": "7"}},
},
{
Source: "tp2_group4",
Targets: []model.LabelSet{{"__instance__": "8"}},
},
},
interval: 10 * time.Millisecond,
},
},
},
expectedTargets: [][]*targetgroup.Group{
{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
},
{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
{
Source: "tp2_group1",
Targets: []model.LabelSet{{"__instance__": "5"}},
},
{
Source: "tp2_group2",
Targets: []model.LabelSet{{"__instance__": "6"}},
},
},
{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
{
Source: "tp2_group1",
Targets: []model.LabelSet{{"__instance__": "5"}},
},
{
Source: "tp2_group2",
Targets: []model.LabelSet{{"__instance__": "6"}},
},
{
Source: "tp2_group3",
Targets: []model.LabelSet{{"__instance__": "7"}},
},
{
Source: "tp2_group4",
Targets: []model.LabelSet{{"__instance__": "8"}},
},
},
{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
{
Source: "tp1_group3",
Targets: []model.LabelSet{{"__instance__": "3"}},
},
{
Source: "tp1_group4",
Targets: []model.LabelSet{{"__instance__": "4"}},
},
{
Source: "tp2_group1",
Targets: []model.LabelSet{{"__instance__": "5"}},
},
{
Source: "tp2_group2",
Targets: []model.LabelSet{{"__instance__": "6"}},
},
{
Source: "tp2_group3",
Targets: []model.LabelSet{{"__instance__": "7"}},
},
{
Source: "tp2_group4",
Targets: []model.LabelSet{{"__instance__": "8"}},
},
},
},
},
{
title: "One TP initials arrive after other TP updates.",
updates: map[string][]update{
"tp1": {
{
targetGroups: []targetgroup.Group{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
},
interval: 10 * time.Millisecond,
},
{
targetGroups: []targetgroup.Group{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "3"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "4"}},
},
},
interval: 150 * time.Millisecond,
},
},
"tp2": {
{
targetGroups: []targetgroup.Group{
{
Source: "tp2_group1",
Targets: []model.LabelSet{{"__instance__": "5"}},
},
{
Source: "tp2_group2",
Targets: []model.LabelSet{{"__instance__": "6"}},
},
},
interval: 200 * time.Millisecond,
},
{
targetGroups: []targetgroup.Group{
{
Source: "tp2_group1",
Targets: []model.LabelSet{{"__instance__": "7"}},
},
{
Source: "tp2_group2",
Targets: []model.LabelSet{{"__instance__": "8"}},
},
},
interval: 100 * time.Millisecond,
},
},
},
expectedTargets: [][]*targetgroup.Group{
{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
},
{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "3"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "4"}},
},
},
{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "3"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "4"}},
},
{
Source: "tp2_group1",
Targets: []model.LabelSet{{"__instance__": "5"}},
},
{
Source: "tp2_group2",
Targets: []model.LabelSet{{"__instance__": "6"}},
},
},
{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "3"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "4"}},
},
{
Source: "tp2_group1",
Targets: []model.LabelSet{{"__instance__": "7"}},
},
{
Source: "tp2_group2",
Targets: []model.LabelSet{{"__instance__": "8"}},
},
},
},
},
{
title: "Single TP empty update in between",
updates: map[string][]update{
"tp1": {
{
targetGroups: []targetgroup.Group{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
},
interval: 30 * time.Millisecond,
},
{
targetGroups: []targetgroup.Group{
{
Source: "tp1_group1",
Targets: []model.LabelSet{},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{},
},
},
interval: 10 * time.Millisecond,
},
{
targetGroups: []targetgroup.Group{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "3"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "4"}},
},
},
interval: 300 * time.Millisecond,
},
},
},
expectedTargets: [][]*targetgroup.Group{
{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
},
{
{
Source: "tp1_group1",
Targets: []model.LabelSet{},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{},
},
},
{
{
Source: "tp1_group1",
Targets: []model.LabelSet{{"__instance__": "3"}},
},
{
Source: "tp1_group2",
Targets: []model.LabelSet{{"__instance__": "4"}},
},
},
},
},
}
for i, tc := range testCases {
tc := tc
t.Run(tc.title, func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
var totalUpdatesCount int
for _, up := range tc.updates {
if len(up) > 0 {
totalUpdatesCount += len(up)
}
}
provUpdates := make(chan []*targetgroup.Group, totalUpdatesCount)
for _, up := range tc.updates {
go newMockDiscoveryProvider(up...).Run(ctx, provUpdates)
}
for x := 0; x < totalUpdatesCount; x++ {
select {
case <-ctx.Done():
require.FailNow(t, "%d: no update arrived within the timeout limit", x)
case tgs := <-provUpdates:
discoveryManager.updateGroup(poolKey{setName: strconv.Itoa(i), provider: tc.title}, tgs)
for _, got := range discoveryManager.allGroups() {
assertEqualGroups(t, got, tc.expectedTargets[x])
}
}
}
})
}
}
func assertEqualGroups(t *testing.T, got, expected []*targetgroup.Group) {
t.Helper()
// Need to sort by the groups's source as the received order is not guaranteed.
sort.Sort(byGroupSource(got))
sort.Sort(byGroupSource(expected))
require.Equal(t, expected, got)
}
func staticConfig(addrs ...string) StaticConfig {
var cfg StaticConfig
for i, addr := range addrs {
cfg = append(cfg, &targetgroup.Group{
Source: strconv.Itoa(i),
Targets: []model.LabelSet{
{model.AddressLabel: model.LabelValue(addr)},
},
})
}
return cfg
}
func verifySyncedPresence(t *testing.T, tGroups map[string][]*targetgroup.Group, key, label string, present bool) {
t.Helper()
if _, ok := tGroups[key]; !ok {
t.Fatalf("'%s' should be present in Group map keys: %v", key, tGroups)
}
match := false
var mergedTargets string
for _, targetGroups := range tGroups[key] {
for _, l := range targetGroups.Targets {
mergedTargets = mergedTargets + " " + l.String()
if l.String() == label {
match = true
}
}
}
if match != present {
msg := ""
if !present {
msg = "not"
}
t.Fatalf("%q should %s be present in Group labels: %q", label, msg, mergedTargets)
}
}
func verifyPresence(t *testing.T, tSets map[poolKey]map[string]*targetgroup.Group, poolKey poolKey, label string, present bool) {
t.Helper()
_, ok := tSets[poolKey]
require.True(t, ok, "'%s' should be present in Pool keys: %v", poolKey, tSets)
match := false
var mergedTargets string
for _, targetGroup := range tSets[poolKey] {
for _, l := range targetGroup.Targets {
mergedTargets = mergedTargets + " " + l.String()
if l.String() == label {
match = true
}
}
}
if match != present {
msg := ""
if !present {
msg = "not"
}
require.FailNow(t, "%q should %s be present in Targets labels: %q", label, msg, mergedTargets)
}
}
func pk(provider, setName string, n int) poolKey {
return poolKey{
setName: setName,
provider: fmt.Sprintf("%s/%d", provider, n),
}
}
func TestTargetSetTargetGroupsPresentOnConfigReload(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
c := map[string]Configs{
"prometheus": {
staticConfig("foo:9090"),
},
}
discoveryManager.ApplyConfig(c)
syncedTargets := <-discoveryManager.SyncCh()
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Len(t, syncedTargets["prometheus"], 1)
p := pk("static", "prometheus", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
require.Len(t, discoveryManager.targets, 1)
discoveryManager.ApplyConfig(c)
syncedTargets = <-discoveryManager.SyncCh()
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
require.Len(t, discoveryManager.targets, 1)
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Len(t, syncedTargets["prometheus"], 1)
}
func TestTargetSetTargetGroupsPresentOnConfigRename(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
c := map[string]Configs{
"prometheus": {
staticConfig("foo:9090"),
},
}
discoveryManager.ApplyConfig(c)
syncedTargets := <-discoveryManager.SyncCh()
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Len(t, syncedTargets["prometheus"], 1)
p := pk("static", "prometheus", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
require.Len(t, discoveryManager.targets, 1)
c["prometheus2"] = c["prometheus"]
delete(c, "prometheus")
discoveryManager.ApplyConfig(c)
syncedTargets = <-discoveryManager.SyncCh()
p = pk("static", "prometheus2", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
require.Len(t, discoveryManager.targets, 1)
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus2", "{__address__=\"foo:9090\"}", true)
require.Len(t, syncedTargets["prometheus2"], 1)
}
func TestTargetSetTargetGroupsPresentOnConfigDuplicateAndDeleteOriginal(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
c := map[string]Configs{
"prometheus": {
staticConfig("foo:9090"),
},
}
discoveryManager.ApplyConfig(c)
<-discoveryManager.SyncCh()
c["prometheus2"] = c["prometheus"]
discoveryManager.ApplyConfig(c)
syncedTargets := <-discoveryManager.SyncCh()
require.Len(t, syncedTargets, 2)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Len(t, syncedTargets["prometheus"], 1)
verifySyncedPresence(t, syncedTargets, "prometheus2", "{__address__=\"foo:9090\"}", true)
require.Len(t, syncedTargets["prometheus2"], 1)
p := pk("static", "prometheus", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
require.Len(t, discoveryManager.targets, 2)
delete(c, "prometheus")
discoveryManager.ApplyConfig(c)
syncedTargets = <-discoveryManager.SyncCh()
p = pk("static", "prometheus2", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
require.Len(t, discoveryManager.targets, 1)
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus2", "{__address__=\"foo:9090\"}", true)
require.Len(t, syncedTargets["prometheus2"], 1)
}
func TestTargetSetTargetGroupsPresentOnConfigChange(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
c := map[string]Configs{
"prometheus": {
staticConfig("foo:9090"),
},
}
discoveryManager.ApplyConfig(c)
syncedTargets := <-discoveryManager.SyncCh()
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Len(t, syncedTargets["prometheus"], 1)
var mu sync.Mutex
c["prometheus2"] = Configs{
lockStaticConfig{
mu: &mu,
config: staticConfig("bar:9090"),
},
}
mu.Lock()
discoveryManager.ApplyConfig(c)
// Original targets should be present as soon as possible.
// An empty list should be sent for prometheus2 to drop any stale targets
syncedTargets = <-discoveryManager.SyncCh()
mu.Unlock()
require.Len(t, syncedTargets, 2)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Len(t, syncedTargets["prometheus"], 1)
require.Empty(t, syncedTargets["prometheus2"])
// prometheus2 configs should be ready on second sync.
syncedTargets = <-discoveryManager.SyncCh()
require.Len(t, syncedTargets, 2)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Len(t, syncedTargets["prometheus"], 1)
verifySyncedPresence(t, syncedTargets, "prometheus2", "{__address__=\"bar:9090\"}", true)
require.Len(t, syncedTargets["prometheus2"], 1)
p := pk("static", "prometheus", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
p = pk("lockstatic", "prometheus2", 1)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"bar:9090\"}", true)
require.Len(t, discoveryManager.targets, 2)
// Delete part of config and ensure only original targets exist.
delete(c, "prometheus2")
discoveryManager.ApplyConfig(c)
syncedTargets = <-discoveryManager.SyncCh()
require.Len(t, discoveryManager.targets, 1)
verifyPresence(t, discoveryManager.targets, pk("static", "prometheus", 0), "{__address__=\"foo:9090\"}", true)
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Len(t, syncedTargets["prometheus"], 1)
}
func TestTargetSetRecreatesTargetGroupsOnConfigChange(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
c := map[string]Configs{
"prometheus": {
staticConfig("foo:9090", "bar:9090"),
},
}
discoveryManager.ApplyConfig(c)
syncedTargets := <-discoveryManager.SyncCh()
p := pk("static", "prometheus", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"bar:9090\"}", true)
require.Len(t, discoveryManager.targets, 1)
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"bar:9090\"}", true)
require.Len(t, syncedTargets["prometheus"], 2)
c["prometheus"] = Configs{
staticConfig("foo:9090"),
}
discoveryManager.ApplyConfig(c)
syncedTargets = <-discoveryManager.SyncCh()
require.Len(t, discoveryManager.targets, 1)
p = pk("static", "prometheus", 1)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"bar:9090\"}", false)
require.Len(t, discoveryManager.targets, 1)
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Len(t, syncedTargets["prometheus"], 1)
}
func TestDiscovererConfigs(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
c := map[string]Configs{
"prometheus": {
staticConfig("foo:9090", "bar:9090"),
staticConfig("baz:9090"),
},
}
discoveryManager.ApplyConfig(c)
syncedTargets := <-discoveryManager.SyncCh()
p := pk("static", "prometheus", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"bar:9090\"}", true)
p = pk("static", "prometheus", 1)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"baz:9090\"}", true)
require.Len(t, discoveryManager.targets, 2)
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"bar:9090\"}", true)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"baz:9090\"}", true)
require.Len(t, syncedTargets["prometheus"], 3)
}
// TestTargetSetRecreatesEmptyStaticConfigs ensures that reloading a config file after
// removing all targets from the static_configs cleans the corresponding targetGroups entries to avoid leaks and sends an empty update.
// The update is required to signal the consumers that the previous targets should be dropped.
func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
c := map[string]Configs{
"prometheus": {
staticConfig("foo:9090"),
},
}
discoveryManager.ApplyConfig(c)
syncedTargets := <-discoveryManager.SyncCh()
p := pk("static", "prometheus", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Len(t, syncedTargets["prometheus"], 1)
c["prometheus"] = Configs{
StaticConfig{{}},
}
discoveryManager.ApplyConfig(c)
syncedTargets = <-discoveryManager.SyncCh()
require.Len(t, discoveryManager.targets, 1)
p = pk("static", "prometheus", 1)
targetGroups, ok := discoveryManager.targets[p]
require.True(t, ok, "'%v' should be present in targets", p)
// Otherwise the targetGroups will leak, see https://github.com/prometheus/prometheus/issues/12436.
require.Empty(t, targetGroups, 0, "'%v' should no longer have any associated target groups", p)
require.Len(t, syncedTargets, 1, "an update with no targetGroups should still be sent.")
require.Empty(t, syncedTargets["prometheus"], 0)
}
func TestIdenticalConfigurationsAreCoalesced(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, nil, reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
c := map[string]Configs{
"prometheus": {
staticConfig("foo:9090"),
},
"prometheus2": {
staticConfig("foo:9090"),
},
}
discoveryManager.ApplyConfig(c)
syncedTargets := <-discoveryManager.SyncCh()
verifyPresence(t, discoveryManager.targets, pk("static", "prometheus", 0), "{__address__=\"foo:9090\"}", true)
verifyPresence(t, discoveryManager.targets, pk("static", "prometheus2", 0), "{__address__=\"foo:9090\"}", true)
require.Len(t, discoveryManager.providers, 1, "Invalid number of providers.")
require.Len(t, syncedTargets, 2)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Len(t, syncedTargets["prometheus"], 1)
verifySyncedPresence(t, syncedTargets, "prometheus2", "{__address__=\"foo:9090\"}", true)
require.Len(t, syncedTargets["prometheus2"], 1)
}
func TestApplyConfigDoesNotModifyStaticTargets(t *testing.T) {
originalConfig := Configs{
staticConfig("foo:9090", "bar:9090", "baz:9090"),
}
processedConfig := Configs{
staticConfig("foo:9090", "bar:9090", "baz:9090"),
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
cfgs := map[string]Configs{
"prometheus": processedConfig,
}
discoveryManager.ApplyConfig(cfgs)
<-discoveryManager.SyncCh()
for _, cfg := range cfgs {
require.Equal(t, originalConfig, cfg)
}
}
type errorConfig struct{ err error }
func (e errorConfig) Name() string { return "error" }
func (e errorConfig) NewDiscoverer(DiscovererOptions) (Discoverer, error) { return nil, e.err }
// NewDiscovererMetrics implements discovery.Config.
func (errorConfig) NewDiscovererMetrics(prometheus.Registerer, RefreshMetricsInstantiator) DiscovererMetrics {
return &NoopDiscovererMetrics{}
}
type lockStaticConfig struct {
mu *sync.Mutex
config StaticConfig
}
// NewDiscovererMetrics implements discovery.Config.
func (lockStaticConfig) NewDiscovererMetrics(prometheus.Registerer, RefreshMetricsInstantiator) DiscovererMetrics {
return &NoopDiscovererMetrics{}
}
func (s lockStaticConfig) Name() string { return "lockstatic" }
func (s lockStaticConfig) NewDiscoverer(DiscovererOptions) (Discoverer, error) {
return (lockStaticDiscoverer)(s), nil
}
type lockStaticDiscoverer lockStaticConfig
func (s lockStaticDiscoverer) Run(ctx context.Context, up chan<- []*targetgroup.Group) {
// TODO: existing implementation closes up chan, but documentation explicitly forbids it...?
defer close(up)
s.mu.Lock()
defer s.mu.Unlock()
select {
case <-ctx.Done():
case up <- s.config:
}
}
func TestGaugeFailedConfigs(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
c := map[string]Configs{
"prometheus": {
errorConfig{fmt.Errorf("tests error 0")},
errorConfig{fmt.Errorf("tests error 1")},
errorConfig{fmt.Errorf("tests error 2")},
},
}
discoveryManager.ApplyConfig(c)
<-discoveryManager.SyncCh()
failedCount := client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs)
require.Equal(t, 3.0, failedCount, "Expected to have 3 failed configs.")
c["prometheus"] = Configs{
staticConfig("foo:9090"),
}
discoveryManager.ApplyConfig(c)
<-discoveryManager.SyncCh()
failedCount = client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs)
require.Equal(t, 0.0, failedCount, "Expected to get no failed config.")
}
func TestCoordinationWithReceiver(t *testing.T) {
updateDelay := 100 * time.Millisecond
type expect struct {
delay time.Duration
tgs map[string][]*targetgroup.Group
}
testCases := []struct {
title string
providers map[string]Discoverer
expected []expect
}{
{
title: "Receiver should get all updates even when one provider closes its channel",
providers: map[string]Discoverer{
"once1": &onceProvider{
tgs: []*targetgroup.Group{
{
Source: "tg1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
},
},
"mock1": newMockDiscoveryProvider(
update{
interval: 2 * updateDelay,
targetGroups: []targetgroup.Group{
{
Source: "tg2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
},
},
),
},
expected: []expect{
{
tgs: map[string][]*targetgroup.Group{
"once1": {
{
Source: "tg1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
},
"mock1": {},
},
},
{
tgs: map[string][]*targetgroup.Group{
"once1": {
{
Source: "tg1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
},
"mock1": {
{
Source: "tg2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
},
},
},
},
},
{
title: "Receiver should get all updates even when the channel is blocked",
providers: map[string]Discoverer{
"mock1": newMockDiscoveryProvider(
update{
targetGroups: []targetgroup.Group{
{
Source: "tg1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
},
},
update{
interval: 4 * updateDelay,
targetGroups: []targetgroup.Group{
{
Source: "tg2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
},
},
),
},
expected: []expect{
{
delay: 2 * updateDelay,
tgs: map[string][]*targetgroup.Group{
"mock1": {
{
Source: "tg1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
},
},
},
{
delay: 4 * updateDelay,
tgs: map[string][]*targetgroup.Group{
"mock1": {
{
Source: "tg1",
Targets: []model.LabelSet{{"__instance__": "1"}},
},
{
Source: "tg2",
Targets: []model.LabelSet{{"__instance__": "2"}},
},
},
},
},
},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.title, func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
mgr := NewManager(ctx, nil, reg, sdMetrics)
require.NotNil(t, mgr)
mgr.updatert = updateDelay
go mgr.Run()
for name, p := range tc.providers {
mgr.StartCustomProvider(ctx, name, p)
}
for i, expected := range tc.expected {
time.Sleep(expected.delay)
select {
case <-ctx.Done():
require.FailNow(t, "step %d: no update received in the expected timeframe", i)
case tgs, ok := <-mgr.SyncCh():
require.True(t, ok, "step %d: discovery manager channel is closed", i)
require.Equal(t, len(expected.tgs), len(tgs), "step %d: targets mismatch", i)
for k := range expected.tgs {
_, ok := tgs[k]
require.True(t, ok, "step %d: target group not found: %s", i, k)
assertEqualGroups(t, tgs[k], expected.tgs[k])
}
}
}
})
}
}
type update struct {
targetGroups []targetgroup.Group
interval time.Duration
}
type mockdiscoveryProvider struct {
updates []update
}
func newMockDiscoveryProvider(updates ...update) mockdiscoveryProvider {
tp := mockdiscoveryProvider{
updates: updates,
}
return tp
}
func (tp mockdiscoveryProvider) Run(ctx context.Context, upCh chan<- []*targetgroup.Group) {
for _, u := range tp.updates {
if u.interval > 0 {
select {
case <-ctx.Done():
return
case <-time.After(u.interval):
}
}
tgs := make([]*targetgroup.Group, len(u.targetGroups))
for i := range u.targetGroups {
tgs[i] = &u.targetGroups[i]
}
select {
case <-ctx.Done():
return
case upCh <- tgs:
}
}
<-ctx.Done()
}
// byGroupSource implements sort.Interface so we can sort by the Source field.
type byGroupSource []*targetgroup.Group
func (a byGroupSource) Len() int { return len(a) }
func (a byGroupSource) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a byGroupSource) Less(i, j int) bool { return a[i].Source < a[j].Source }
// onceProvider sends updates once (if any) and closes the update channel.
type onceProvider struct {
tgs []*targetgroup.Group
}
func (o onceProvider) Run(_ context.Context, ch chan<- []*targetgroup.Group) {
if len(o.tgs) > 0 {
ch <- o.tgs
}
close(ch)
}
// TestTargetSetTargetGroupsUpdateDuringApplyConfig is used to detect races when
// ApplyConfig happens at the same time as targets update.
func TestTargetSetTargetGroupsUpdateDuringApplyConfig(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
td := newTestDiscoverer()
c := map[string]Configs{
"prometheus": {
td,
},
}
discoveryManager.ApplyConfig(c)
var wg sync.WaitGroup
wg.Add(2000)
start := make(chan struct{})
for i := 0; i < 1000; i++ {
go func() {
<-start
td.update([]*targetgroup.Group{
{
Targets: []model.LabelSet{
{model.AddressLabel: model.LabelValue("127.0.0.1:9090")},
},
},
})
wg.Done()
}()
}
for i := 0; i < 1000; i++ {
go func(i int) {
<-start
c := map[string]Configs{
fmt.Sprintf("prometheus-%d", i): {
td,
},
}
discoveryManager.ApplyConfig(c)
wg.Done()
}(i)
}
close(start)
wg.Wait()
}
// testDiscoverer is a config and a discoverer that can adjust targets with a
// simple function.
type testDiscoverer struct {
up chan<- []*targetgroup.Group
ready chan struct{}
}
func newTestDiscoverer() *testDiscoverer {
return &testDiscoverer{
ready: make(chan struct{}),
}
}
// NewDiscovererMetrics implements discovery.Config.
func (*testDiscoverer) NewDiscovererMetrics(prometheus.Registerer, RefreshMetricsInstantiator) DiscovererMetrics {
return &NoopDiscovererMetrics{}
}
// Name implements Config.
func (t *testDiscoverer) Name() string {
return "test"
}
// NewDiscoverer implements Config.
func (t *testDiscoverer) NewDiscoverer(DiscovererOptions) (Discoverer, error) {
return t, nil
}
// Run implements Discoverer.
func (t *testDiscoverer) Run(ctx context.Context, up chan<- []*targetgroup.Group) {
t.up = up
close(t.ready)
<-ctx.Done()
}
func (t *testDiscoverer) update(tgs []*targetgroup.Group) {
<-t.ready
t.up <- tgs
}
func TestUnregisterMetrics(t *testing.T) {
reg := prometheus.NewRegistry()
// Check that all metrics can be unregistered, allowing a second manager to be created.
for i := 0; i < 2; i++ {
ctx, cancel := context.WithCancel(context.Background())
refreshMetrics, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
// discoveryManager will be nil if there was an error configuring metrics.
require.NotNil(t, discoveryManager)
// Unregister all metrics.
discoveryManager.UnregisterMetrics()
for _, sdMetric := range sdMetrics {
sdMetric.Unregister()
}
refreshMetrics.Unregister()
cancel()
}
}