diff --git a/.github/workflows/buf-lint.yml b/.github/workflows/buf-lint.yml index 9826693eb6..3b60b82674 100644 --- a/.github/workflows/buf-lint.yml +++ b/.github/workflows/buf-lint.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: bufbuild/buf-setup-action@v1.5.0 + - uses: bufbuild/buf-setup-action@v1.6.0 - uses: bufbuild/buf-lint-action@v1 with: input: 'prompb' diff --git a/.github/workflows/buf.yml b/.github/workflows/buf.yml index 11126f83dc..94db353377 100644 --- a/.github/workflows/buf.yml +++ b/.github/workflows/buf.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: bufbuild/buf-setup-action@v1.5.0 + - uses: bufbuild/buf-setup-action@v1.6.0 - uses: bufbuild/buf-lint-action@v1 with: input: 'prompb' diff --git a/.github/workflows/ui_build_and_release.yml b/.github/workflows/ui_build_and_release.yml new file mode 100644 index 0000000000..f3ca674189 --- /dev/null +++ b/.github/workflows/ui_build_and_release.yml @@ -0,0 +1,44 @@ +name: ui_build_and_release +on: + pull_request: + push: + branches: + - main + tags: + - "v0.[0-9]+.[0-9]+*" +jobs: + release: + name: release + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Install nodejs + uses: actions/setup-node@v3 + with: + node-version-file: "web/ui/.nvmrc" + - uses: actions/cache@v3.0.4 + with: + path: ~/.npm + key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} + restore-keys: | + ${{ runner.os }}-node- + + - name: Check libraries version + ## This step is verifying that the version of each package is matching the tag + if: ${{ github.event_name == 'push' && startsWith(github.ref_name, 'v') }} + run: ./scripts/ui_release.sh --check-package "${{ github.ref_name }}" + - name: build + run: make assets + - name: Copy files before publishing libs + run: ./scripts/ui_release.sh --copy + - name: Publish dry-run libraries + if: ${{ github.event_name == 'pull_request' || github.ref_name == 'main' }} + run: ./scripts/ui_release.sh --publish dry-run + - name: Publish libraries + if: ${{ github.event_name == 'push' && startsWith(github.ref_name, 'v') }} + run: ./scripts/ui_release.sh --publish + env: + # The setup-node action writes an .npmrc file with this env variable + # as the placeholder for the auth token + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} diff --git a/.promu.yml b/.promu.yml index cf58e83d6f..85baf7d19e 100644 --- a/.promu.yml +++ b/.promu.yml @@ -35,5 +35,4 @@ crossbuild: - illumos - linux - netbsd - - openbsd - windows diff --git a/CHANGELOG.md b/CHANGELOG.md index 9215f67bc7..3d9dd46e6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,32 @@ # Changelog +## 2.37.0-rc.0 / 2022-07-05 + +Following data loss by users due to lack of unified buffer cache in OpenBSD, we +will no longer release Prometheus upstream for OpenBSD until a proper solution is +found. #8799 + +* [FEATURE] Nomad SD: New service discovery for Nomad built-in service discovery. #10915 +* [ENHANCEMENT] Kubernetes SD: Allow attaching node labels for endpoint role. #10759 +* [ENHANCEMENT] PromQL: Optimise creation of signature with/without labels. #10667 +* [ENHANCEMENT] TSDB: Memory optimizations. #10873 #10874 +* [ENHANCEMENT] TSDB: Reduce sleep time when reading WAL. #10859 #10878 +* [BUGFIX] Alerting: Fix Alertmanager targets not being updated when alerts were queued. #10948 +* [BUGFIX] Hetzner SD: Make authentication files relative to Prometheus config file. #10813 +* [BUGFIX] Promtool: Fix `promtool check config` not erroring properly on failures. #10952 +* [BUGFIX] Scrape: Keep relabeled scrape interval and timeout on reloads. #10916 +* [BUGFIX] TSDB: Don't increment `prometheus_tsdb_compactions_failed_total` when context is canceled. #10772 +* [BUGFIX] TSDB: Fix panic if series is not found when deleting series. #10907 +* [BUGFIX] TSDB: Increase `prometheus_tsdb_mmap_chunk_corruptions_total` on out of sequence errors. #10406 +* [BUGFIX] Uyuni SD: Make authentication files relative to Prometheus configuration file and fix default configuration values. #10813 + +## 2.36.2 / 2022-06-20 + +* [BUGFIX] Fix serving of static assets like fonts and favicon. #10888 + ## 2.36.1 / 2022-06-09 -* [BUGFIX] promtool: Add --lint-fatal option #10840 +* [BUGFIX] promtool: Add --lint-fatal option. #10840 ## 2.36.0 / 2022-05-30 diff --git a/Makefile b/Makefile index 71d3e949e5..c4a9a06cfa 100644 --- a/Makefile +++ b/Makefile @@ -39,6 +39,12 @@ upgrade-npm-deps: @echo ">> upgrading npm dependencies" ./scripts/npm-deps.sh "latest" +.PHONY: ui-bump-version +ui-bump-version: + version=$$(sed s/2/0/ < VERSION) && ./scripts/ui_release.sh --bump-version "$${version}" + cd web/ui && npm install + git add "./web/ui/package-lock.json" "./**/package.json" + .PHONY: ui-install ui-install: cd $(UI_PATH) && npm install diff --git a/RELEASE.md b/RELEASE.md index 077dabe476..88d85e8ace 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -144,6 +144,12 @@ Entries in the `CHANGELOG.md` are meant to be in this order: * `[ENHANCEMENT]` * `[BUGFIX]` +Then bump the UI module version: + +```bash +make ui-bump-version +``` + ### 2. Draft the new release Tag the new release via the following commands: diff --git a/VERSION b/VERSION index 089d1d4e02..1dadcdca60 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.36.1 +2.37.0-rc.0 diff --git a/cmd/prometheus/main_unix_test.go b/cmd/prometheus/main_unix_test.go index d7eed636dd..b49110ea91 100644 --- a/cmd/prometheus/main_unix_test.go +++ b/cmd/prometheus/main_unix_test.go @@ -54,7 +54,7 @@ func TestStartupInterrupt(t *testing.T) { Loop: for x := 0; x < 10; x++ { - // error=nil means prometheus has started so we can send the interrupt + // error=nil means prometheus has started, so we can send the interrupt // signal and wait for the graceful shutdown. if _, err := http.Get(url); err == nil { startedOk = true diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index ad802e4824..96d7f3851a 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -344,6 +344,7 @@ func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings lintConfig, files ruleFiles, err := checkConfig(agentMode, f, checkSyntaxOnly) if err != nil { fmt.Fprintln(os.Stderr, " FAILED:", err) + hasErrors = true failed = true } else { if len(ruleFiles) > 0 { diff --git a/cmd/promtool/main_test.go b/cmd/promtool/main_test.go index d4773fc918..40df3e5248 100644 --- a/cmd/promtool/main_test.go +++ b/cmd/promtool/main_test.go @@ -14,13 +14,16 @@ package main import ( + "errors" "fmt" "net/http" "net/http/httptest" "net/url" "os" + "os/exec" "runtime" "strings" + "syscall" "testing" "time" @@ -30,6 +33,21 @@ import ( "github.com/prometheus/prometheus/model/rulefmt" ) +var promtoolPath = os.Args[0] + +func TestMain(m *testing.M) { + for i, arg := range os.Args { + if arg == "-test.main" { + os.Args = append(os.Args[:i], os.Args[i+1:]...) + main() + return + } + } + + exitCode := m.Run() + os.Exit(exitCode) +} + func TestQueryRange(t *testing.T) { s, getRequest := mockServer(200, `{"status": "success", "data": {"resultType": "matrix", "result": []}}`) defer s.Close() @@ -359,3 +377,59 @@ func TestCheckMetricsExtended(t *testing.T) { }, }, stats) } + +func TestExitCodes(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + for _, c := range []struct { + file string + exitCode int + lintIssue bool + }{ + { + file: "prometheus-config.good.yml", + }, + { + file: "prometheus-config.bad.yml", + exitCode: 1, + }, + { + file: "prometheus-config.nonexistent.yml", + exitCode: 1, + }, + { + file: "prometheus-config.lint.yml", + lintIssue: true, + exitCode: 3, + }, + } { + t.Run(c.file, func(t *testing.T) { + for _, lintFatal := range []bool{true, false} { + t.Run(fmt.Sprintf("%t", lintFatal), func(t *testing.T) { + args := []string{"-test.main", "check", "config", "testdata/" + c.file} + if lintFatal { + args = append(args, "--lint-fatal") + } + tool := exec.Command(promtoolPath, args...) + err := tool.Run() + if c.exitCode == 0 || (c.lintIssue && !lintFatal) { + require.NoError(t, err) + return + } + + require.Error(t, err) + + var exitError *exec.ExitError + if errors.As(err, &exitError) { + status := exitError.Sys().(syscall.WaitStatus) + require.Equal(t, c.exitCode, status.ExitStatus()) + } else { + t.Errorf("unable to retrieve the exit status for promtool: %v", err) + } + }) + } + }) + } +} diff --git a/cmd/promtool/testdata/prometheus-config.bad.yml b/cmd/promtool/testdata/prometheus-config.bad.yml new file mode 100644 index 0000000000..13e262e051 --- /dev/null +++ b/cmd/promtool/testdata/prometheus-config.bad.yml @@ -0,0 +1 @@ +not-prometheus: diff --git a/cmd/promtool/testdata/prometheus-config.good.yml b/cmd/promtool/testdata/prometheus-config.good.yml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cmd/promtool/testdata/prometheus-config.lint.yml b/cmd/promtool/testdata/prometheus-config.lint.yml new file mode 100644 index 0000000000..ffa7c38434 --- /dev/null +++ b/cmd/promtool/testdata/prometheus-config.lint.yml @@ -0,0 +1,2 @@ +rule_files: + - prometheus-rules.lint.yml diff --git a/cmd/promtool/testdata/prometheus-rules.lint.yml b/cmd/promtool/testdata/prometheus-rules.lint.yml new file mode 100644 index 0000000000..b067675de1 --- /dev/null +++ b/cmd/promtool/testdata/prometheus-rules.lint.yml @@ -0,0 +1,17 @@ +groups: +- name: example + rules: + - alert: HighRequestLatency + expr: job:request_latency_seconds:mean5m{job="myjob"} > 0.5 + for: 10m + labels: + severity: page + annotations: + summary: High request latency + - alert: HighRequestLatency + expr: job:request_latency_seconds:mean5m{job="myjob"} > 0.5 + for: 10m + labels: + severity: page + annotations: + summary: High request latency diff --git a/config/config_test.go b/config/config_test.go index dc8ab742ba..54b4e2b111 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -45,6 +45,7 @@ import ( "github.com/prometheus/prometheus/discovery/linode" "github.com/prometheus/prometheus/discovery/marathon" "github.com/prometheus/prometheus/discovery/moby" + "github.com/prometheus/prometheus/discovery/nomad" "github.com/prometheus/prometheus/discovery/openstack" "github.com/prometheus/prometheus/discovery/puppetdb" "github.com/prometheus/prometheus/discovery/scaleway" @@ -513,6 +514,32 @@ var expectedConf = &Config{ }, }, }, + { + JobName: "service-nomad", + + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + + MetricsPath: DefaultScrapeConfig.MetricsPath, + Scheme: DefaultScrapeConfig.Scheme, + HTTPClientConfig: config.DefaultHTTPClientConfig, + + ServiceDiscoveryConfigs: discovery.Configs{ + &nomad.SDConfig{ + AllowStale: true, + Namespace: "default", + RefreshInterval: model.Duration(60 * time.Second), + Region: "global", + Server: "http://localhost:4646", + TagSeparator: ",", + HTTPClientConfig: config.HTTPClientConfig{ + FollowRedirects: true, + EnableHTTP2: true, + }, + }, + }, + }, { JobName: "service-ec2", diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index e217a82ead..a894907c5f 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -214,6 +214,10 @@ scrape_configs: cert_file: valid_cert_file key_file: valid_key_file + - job_name: service-nomad + nomad_sd_configs: + - server: 'http://localhost:4646' + - job_name: service-ec2 ec2_sd_configs: - region: us-east-1 diff --git a/discovery/install/install.go b/discovery/install/install.go index d8a5a82cb4..36e13948d3 100644 --- a/discovery/install/install.go +++ b/discovery/install/install.go @@ -31,6 +31,7 @@ import ( _ "github.com/prometheus/prometheus/discovery/linode" // register linode _ "github.com/prometheus/prometheus/discovery/marathon" // register marathon _ "github.com/prometheus/prometheus/discovery/moby" // register moby + _ "github.com/prometheus/prometheus/discovery/nomad" // register nomad _ "github.com/prometheus/prometheus/discovery/openstack" // register openstack _ "github.com/prometheus/prometheus/discovery/puppetdb" // register puppetdb _ "github.com/prometheus/prometheus/discovery/scaleway" // register scaleway diff --git a/discovery/kubernetes/endpoints.go b/discovery/kubernetes/endpoints.go index 14615a09c7..f1fd82672f 100644 --- a/discovery/kubernetes/endpoints.go +++ b/discovery/kubernetes/endpoints.go @@ -41,9 +41,11 @@ var ( type Endpoints struct { logger log.Logger - endpointsInf cache.SharedInformer - serviceInf cache.SharedInformer - podInf cache.SharedInformer + endpointsInf cache.SharedIndexInformer + serviceInf cache.SharedInformer + podInf cache.SharedInformer + nodeInf cache.SharedInformer + withNodeMetadata bool podStore cache.Store endpointsStore cache.Store @@ -53,19 +55,21 @@ type Endpoints struct { } // NewEndpoints returns a new endpoints discovery. -func NewEndpoints(l log.Logger, svc, eps, pod cache.SharedInformer) *Endpoints { +func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer) *Endpoints { if l == nil { l = log.NewNopLogger() } e := &Endpoints{ - logger: l, - endpointsInf: eps, - endpointsStore: eps.GetStore(), - serviceInf: svc, - serviceStore: svc.GetStore(), - podInf: pod, - podStore: pod.GetStore(), - queue: workqueue.NewNamed("endpoints"), + logger: l, + endpointsInf: eps, + endpointsStore: eps.GetStore(), + serviceInf: svc, + serviceStore: svc.GetStore(), + podInf: pod, + podStore: pod.GetStore(), + nodeInf: node, + withNodeMetadata: node != nil, + queue: workqueue.NewNamed("endpoints"), } e.endpointsInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ @@ -118,10 +122,38 @@ func NewEndpoints(l log.Logger, svc, eps, pod cache.SharedInformer) *Endpoints { serviceUpdate(o) }, }) + if e.withNodeMetadata { + e.nodeInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(o interface{}) { + node := o.(*apiv1.Node) + e.enqueueNode(node.Name) + }, + UpdateFunc: func(_, o interface{}) { + node := o.(*apiv1.Node) + e.enqueueNode(node.Name) + }, + DeleteFunc: func(o interface{}) { + node := o.(*apiv1.Node) + e.enqueueNode(node.Name) + }, + }) + } return e } +func (e *Endpoints) enqueueNode(nodeName string) { + endpoints, err := e.endpointsInf.GetIndexer().ByIndex(nodeIndex, nodeName) + if err != nil { + level.Error(e.logger).Log("msg", "Error getting endpoints for node", "node", nodeName, "err", err) + return + } + + for _, endpoint := range endpoints { + e.enqueue(endpoint) + } +} + func (e *Endpoints) enqueue(obj interface{}) { key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) if err != nil { @@ -135,7 +167,12 @@ func (e *Endpoints) enqueue(obj interface{}) { func (e *Endpoints) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { defer e.queue.ShutDown() - if !cache.WaitForCacheSync(ctx.Done(), e.endpointsInf.HasSynced, e.serviceInf.HasSynced, e.podInf.HasSynced) { + cacheSyncs := []cache.InformerSynced{e.endpointsInf.HasSynced, e.serviceInf.HasSynced, e.podInf.HasSynced} + if e.withNodeMetadata { + cacheSyncs = append(cacheSyncs, e.nodeInf.HasSynced) + } + + if !cache.WaitForCacheSync(ctx.Done(), cacheSyncs...) { if !errors.Is(ctx.Err(), context.Canceled) { level.Error(e.logger).Log("msg", "endpoints informer unable to sync cache") } @@ -257,6 +294,10 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group { target[model.LabelName(endpointHostname)] = lv(addr.Hostname) } + if e.withNodeMetadata { + target = addNodeLabels(target, e.nodeInf, e.logger, addr.NodeName) + } + pod := e.resolvePodRef(addr.TargetRef) if pod == nil { // This target is not a Pod, so don't continue with Pod specific logic. @@ -387,3 +428,31 @@ func (e *Endpoints) addServiceLabels(ns, name string, tg *targetgroup.Group) { tg.Labels = tg.Labels.Merge(serviceLabels(svc)) } + +func addNodeLabels(tg model.LabelSet, nodeInf cache.SharedInformer, logger log.Logger, nodeName *string) model.LabelSet { + if nodeName == nil { + return tg + } + + obj, exists, err := nodeInf.GetStore().GetByKey(*nodeName) + if err != nil { + level.Error(logger).Log("msg", "Error getting node", "node", *nodeName, "err", err) + return tg + } + + if !exists { + return tg + } + + node := obj.(*apiv1.Node) + // Allocate one target label for the node name, + // and two target labels for each node label. + nodeLabelset := make(model.LabelSet, 1+2*len(node.GetLabels())) + nodeLabelset[nodeNameLabel] = lv(*nodeName) + for k, v := range node.GetLabels() { + ln := strutil.SanitizeLabelName(k) + nodeLabelset[model.LabelName(nodeLabelPrefix+ln)] = lv(v) + nodeLabelset[model.LabelName(nodeLabelPresentPrefix+ln)] = presentValue + } + return tg.Merge(nodeLabelset) +} diff --git a/discovery/kubernetes/endpoints_test.go b/discovery/kubernetes/endpoints_test.go index 4d72272564..5fd9460ae2 100644 --- a/discovery/kubernetes/endpoints_test.go +++ b/discovery/kubernetes/endpoints_test.go @@ -478,6 +478,126 @@ func TestEndpointsDiscoveryWithServiceUpdate(t *testing.T) { }.Run(t) } +func TestEndpointsDiscoveryWithNodeMetadata(t *testing.T) { + metadataConfig := AttachMetadataConfig{Node: true} + nodeLabels := map[string]string{"az": "us-east1"} + node := makeNode("foobar", "", "", nodeLabels, nil) + svc := &v1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testendpoints", + Namespace: "default", + Labels: map[string]string{ + "app/name": "test", + }, + }, + } + n, _ := makeDiscoveryWithMetadata(RoleEndpoint, NamespaceDiscovery{}, metadataConfig, makeEndpoints(), svc, node) + + k8sDiscoveryTest{ + discovery: n, + expectedMaxItems: 1, + expectedRes: map[string]*targetgroup.Group{ + "endpoints/default/testendpoints": { + Targets: []model.LabelSet{ + { + "__address__": "1.2.3.4:9000", + "__meta_kubernetes_endpoint_hostname": "testendpoint1", + "__meta_kubernetes_endpoint_node_name": "foobar", + "__meta_kubernetes_endpoint_port_name": "testport", + "__meta_kubernetes_endpoint_port_protocol": "TCP", + "__meta_kubernetes_endpoint_ready": "true", + "__meta_kubernetes_node_label_az": "us-east1", + "__meta_kubernetes_node_labelpresent_az": "true", + "__meta_kubernetes_node_name": "foobar", + }, + { + "__address__": "2.3.4.5:9001", + "__meta_kubernetes_endpoint_port_name": "testport", + "__meta_kubernetes_endpoint_port_protocol": "TCP", + "__meta_kubernetes_endpoint_ready": "true", + }, + { + "__address__": "2.3.4.5:9001", + "__meta_kubernetes_endpoint_port_name": "testport", + "__meta_kubernetes_endpoint_port_protocol": "TCP", + "__meta_kubernetes_endpoint_ready": "false", + }, + }, + Labels: model.LabelSet{ + "__meta_kubernetes_namespace": "default", + "__meta_kubernetes_endpoints_name": "testendpoints", + "__meta_kubernetes_service_label_app_name": "test", + "__meta_kubernetes_service_labelpresent_app_name": "true", + "__meta_kubernetes_service_name": "testendpoints", + }, + Source: "endpoints/default/testendpoints", + }, + }, + }.Run(t) +} + +func TestEndpointsDiscoveryWithUpdatedNodeMetadata(t *testing.T) { + nodeLabels := map[string]string{"az": "us-east1"} + nodes := makeNode("foobar", "", "", nodeLabels, nil) + metadataConfig := AttachMetadataConfig{Node: true} + svc := &v1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testendpoints", + Namespace: "default", + Labels: map[string]string{ + "app/name": "test", + }, + }, + } + n, c := makeDiscoveryWithMetadata(RoleEndpoint, NamespaceDiscovery{}, metadataConfig, makeEndpoints(), nodes, svc) + + k8sDiscoveryTest{ + discovery: n, + afterStart: func() { + nodes.Labels["az"] = "eu-central1" + c.CoreV1().Nodes().Update(context.Background(), nodes, metav1.UpdateOptions{}) + }, + expectedMaxItems: 2, + expectedRes: map[string]*targetgroup.Group{ + "endpoints/default/testendpoints": { + Targets: []model.LabelSet{ + { + "__address__": "1.2.3.4:9000", + "__meta_kubernetes_endpoint_hostname": "testendpoint1", + "__meta_kubernetes_endpoint_node_name": "foobar", + "__meta_kubernetes_endpoint_port_name": "testport", + "__meta_kubernetes_endpoint_port_protocol": "TCP", + "__meta_kubernetes_endpoint_ready": "true", + "__meta_kubernetes_node_label_az": "eu-central1", + "__meta_kubernetes_node_labelpresent_az": "true", + "__meta_kubernetes_node_name": "foobar", + }, + { + "__address__": "2.3.4.5:9001", + "__meta_kubernetes_endpoint_port_name": "testport", + "__meta_kubernetes_endpoint_port_protocol": "TCP", + "__meta_kubernetes_endpoint_ready": "true", + }, + { + "__address__": "2.3.4.5:9001", + "__meta_kubernetes_endpoint_port_name": "testport", + "__meta_kubernetes_endpoint_port_protocol": "TCP", + "__meta_kubernetes_endpoint_ready": "false", + }, + }, + Labels: model.LabelSet{ + "__meta_kubernetes_namespace": "default", + "__meta_kubernetes_endpoints_name": "testendpoints", + "__meta_kubernetes_service_label_app_name": "test", + "__meta_kubernetes_service_labelpresent_app_name": "true", + "__meta_kubernetes_service_name": "testendpoints", + }, + Source: "endpoints/default/testendpoints", + }, + }, + }.Run(t) +} + func TestEndpointsDiscoveryNamespaces(t *testing.T) { epOne := makeEndpoints() epOne.Namespace = "ns1" diff --git a/discovery/kubernetes/endpointslice.go b/discovery/kubernetes/endpointslice.go index 31bc14dd77..6d4c2c8f25 100644 --- a/discovery/kubernetes/endpointslice.go +++ b/discovery/kubernetes/endpointslice.go @@ -42,9 +42,11 @@ var ( type EndpointSlice struct { logger log.Logger - endpointSliceInf cache.SharedInformer + endpointSliceInf cache.SharedIndexInformer serviceInf cache.SharedInformer podInf cache.SharedInformer + nodeInf cache.SharedInformer + withNodeMetadata bool podStore cache.Store endpointSliceStore cache.Store @@ -54,7 +56,7 @@ type EndpointSlice struct { } // NewEndpointSlice returns a new endpointslice discovery. -func NewEndpointSlice(l log.Logger, svc, eps, pod cache.SharedInformer) *EndpointSlice { +func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer) *EndpointSlice { if l == nil { l = log.NewNopLogger() } @@ -66,6 +68,8 @@ func NewEndpointSlice(l log.Logger, svc, eps, pod cache.SharedInformer) *Endpoin serviceStore: svc.GetStore(), podInf: pod, podStore: pod.GetStore(), + nodeInf: node, + withNodeMetadata: node != nil, queue: workqueue.NewNamed("endpointSlice"), } @@ -120,9 +124,38 @@ func NewEndpointSlice(l log.Logger, svc, eps, pod cache.SharedInformer) *Endpoin }, }) + if e.withNodeMetadata { + e.nodeInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(o interface{}) { + node := o.(*apiv1.Node) + e.enqueueNode(node.Name) + }, + UpdateFunc: func(_, o interface{}) { + node := o.(*apiv1.Node) + e.enqueueNode(node.Name) + }, + DeleteFunc: func(o interface{}) { + node := o.(*apiv1.Node) + e.enqueueNode(node.Name) + }, + }) + } + return e } +func (e *EndpointSlice) enqueueNode(nodeName string) { + endpoints, err := e.endpointSliceInf.GetIndexer().ByIndex(nodeIndex, nodeName) + if err != nil { + level.Error(e.logger).Log("msg", "Error getting endpoints for node", "node", nodeName, "err", err) + return + } + + for _, endpoint := range endpoints { + e.enqueue(endpoint) + } +} + func (e *EndpointSlice) enqueue(obj interface{}) { key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) if err != nil { @@ -136,7 +169,11 @@ func (e *EndpointSlice) enqueue(obj interface{}) { func (e *EndpointSlice) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { defer e.queue.ShutDown() - if !cache.WaitForCacheSync(ctx.Done(), e.endpointSliceInf.HasSynced, e.serviceInf.HasSynced, e.podInf.HasSynced) { + cacheSyncs := []cache.InformerSynced{e.endpointSliceInf.HasSynced, e.serviceInf.HasSynced, e.podInf.HasSynced} + if e.withNodeMetadata { + cacheSyncs = append(cacheSyncs, e.nodeInf.HasSynced) + } + if !cache.WaitForCacheSync(ctx.Done(), cacheSyncs...) { if ctx.Err() != context.Canceled { level.Error(e.logger).Log("msg", "endpointslice informer unable to sync cache") } @@ -282,6 +319,10 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou target[model.LabelName(endpointSliceEndpointTopologyLabelPresentPrefix+ln)] = presentValue } + if e.withNodeMetadata { + target = addNodeLabels(target, e.nodeInf, e.logger, ep.nodename()) + } + pod := e.resolvePodRef(ep.targetRef()) if pod == nil { // This target is not a Pod, so don't continue with Pod specific logic. diff --git a/discovery/kubernetes/endpointslice_adaptor.go b/discovery/kubernetes/endpointslice_adaptor.go index f22affb6f6..87484b06fd 100644 --- a/discovery/kubernetes/endpointslice_adaptor.go +++ b/discovery/kubernetes/endpointslice_adaptor.go @@ -41,6 +41,7 @@ type endpointSlicePortAdaptor interface { type endpointSliceEndpointAdaptor interface { addresses() []string hostname() *string + nodename() *string conditions() endpointSliceEndpointConditionsAdaptor targetRef() *corev1.ObjectReference topology() map[string]string @@ -164,6 +165,10 @@ func (e *endpointSliceEndpointAdaptorV1) hostname() *string { return e.endpoint.Hostname } +func (e *endpointSliceEndpointAdaptorV1) nodename() *string { + return e.endpoint.NodeName +} + func (e *endpointSliceEndpointAdaptorV1) conditions() endpointSliceEndpointConditionsAdaptor { return newEndpointSliceEndpointConditionsAdaptorFromV1(e.endpoint.Conditions) } @@ -204,6 +209,10 @@ func (e *endpointSliceEndpointAdaptorV1beta1) hostname() *string { return e.endpoint.Hostname } +func (e *endpointSliceEndpointAdaptorV1beta1) nodename() *string { + return e.endpoint.NodeName +} + func (e *endpointSliceEndpointAdaptorV1beta1) conditions() endpointSliceEndpointConditionsAdaptor { return newEndpointSliceEndpointConditionsAdaptorFromV1beta1(e.endpoint.Conditions) } diff --git a/discovery/kubernetes/endpointslice_test.go b/discovery/kubernetes/endpointslice_test.go index 16148d2a0f..91408c009c 100644 --- a/discovery/kubernetes/endpointslice_test.go +++ b/discovery/kubernetes/endpointslice_test.go @@ -68,6 +68,7 @@ func makeEndpointSliceV1() *v1.EndpointSlice { Conditions: v1.EndpointConditions{Ready: boolptr(true)}, Hostname: strptr("testendpoint1"), TargetRef: &corev1.ObjectReference{}, + NodeName: strptr("foobar"), DeprecatedTopology: map[string]string{ "topology": "value", }, @@ -688,6 +689,147 @@ func TestEndpointSliceDiscoveryWithServiceUpdate(t *testing.T) { }.Run(t) } +func TestEndpointsSlicesDiscoveryWithNodeMetadata(t *testing.T) { + metadataConfig := AttachMetadataConfig{Node: true} + nodeLabels := map[string]string{"az": "us-east1"} + svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testendpoints", + Namespace: "default", + Labels: map[string]string{ + "app/name": "test", + }, + }, + } + objs := []runtime.Object{makeEndpointSliceV1(), makeNode("foobar", "", "", nodeLabels, nil), svc} + n, _ := makeDiscoveryWithMetadata(RoleEndpointSlice, NamespaceDiscovery{}, metadataConfig, objs...) + + k8sDiscoveryTest{ + discovery: n, + expectedMaxItems: 1, + expectedRes: map[string]*targetgroup.Group{ + "endpointslice/default/testendpoints": { + Targets: []model.LabelSet{ + { + "__address__": "1.2.3.4:9000", + "__meta_kubernetes_endpointslice_address_target_kind": "", + "__meta_kubernetes_endpointslice_address_target_name": "", + "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", + "__meta_kubernetes_endpointslice_endpoint_hostname": "testendpoint1", + "__meta_kubernetes_endpointslice_endpoint_topology_present_topology": "true", + "__meta_kubernetes_endpointslice_endpoint_topology_topology": "value", + "__meta_kubernetes_endpointslice_port": "9000", + "__meta_kubernetes_endpointslice_port_app_protocol": "http", + "__meta_kubernetes_endpointslice_port_name": "testport", + "__meta_kubernetes_endpointslice_port_protocol": "TCP", + "__meta_kubernetes_node_label_az": "us-east1", + "__meta_kubernetes_node_labelpresent_az": "true", + "__meta_kubernetes_node_name": "foobar", + }, + { + "__address__": "2.3.4.5:9000", + "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", + "__meta_kubernetes_endpointslice_port": "9000", + "__meta_kubernetes_endpointslice_port_app_protocol": "http", + "__meta_kubernetes_endpointslice_port_name": "testport", + "__meta_kubernetes_endpointslice_port_protocol": "TCP", + }, + { + "__address__": "3.4.5.6:9000", + "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "false", + "__meta_kubernetes_endpointslice_port": "9000", + "__meta_kubernetes_endpointslice_port_app_protocol": "http", + "__meta_kubernetes_endpointslice_port_name": "testport", + "__meta_kubernetes_endpointslice_port_protocol": "TCP", + }, + }, + Labels: model.LabelSet{ + "__meta_kubernetes_endpointslice_address_type": "IPv4", + "__meta_kubernetes_endpointslice_name": "testendpoints", + "__meta_kubernetes_namespace": "default", + "__meta_kubernetes_service_label_app_name": "test", + "__meta_kubernetes_service_labelpresent_app_name": "true", + "__meta_kubernetes_service_name": "testendpoints", + }, + Source: "endpointslice/default/testendpoints", + }, + }, + }.Run(t) +} + +func TestEndpointsSlicesDiscoveryWithUpdatedNodeMetadata(t *testing.T) { + metadataConfig := AttachMetadataConfig{Node: true} + nodeLabels := map[string]string{"az": "us-east1"} + svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testendpoints", + Namespace: "default", + Labels: map[string]string{ + "app/name": "test", + }, + }, + } + node := makeNode("foobar", "", "", nodeLabels, nil) + objs := []runtime.Object{makeEndpointSliceV1(), node, svc} + n, c := makeDiscoveryWithMetadata(RoleEndpointSlice, NamespaceDiscovery{}, metadataConfig, objs...) + + k8sDiscoveryTest{ + discovery: n, + expectedMaxItems: 2, + afterStart: func() { + node.Labels["az"] = "us-central1" + c.CoreV1().Nodes().Update(context.Background(), node, metav1.UpdateOptions{}) + }, + expectedRes: map[string]*targetgroup.Group{ + "endpointslice/default/testendpoints": { + Targets: []model.LabelSet{ + { + "__address__": "1.2.3.4:9000", + "__meta_kubernetes_endpointslice_address_target_kind": "", + "__meta_kubernetes_endpointslice_address_target_name": "", + "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", + "__meta_kubernetes_endpointslice_endpoint_hostname": "testendpoint1", + "__meta_kubernetes_endpointslice_endpoint_topology_present_topology": "true", + "__meta_kubernetes_endpointslice_endpoint_topology_topology": "value", + "__meta_kubernetes_endpointslice_port": "9000", + "__meta_kubernetes_endpointslice_port_app_protocol": "http", + "__meta_kubernetes_endpointslice_port_name": "testport", + "__meta_kubernetes_endpointslice_port_protocol": "TCP", + "__meta_kubernetes_node_label_az": "us-central1", + "__meta_kubernetes_node_labelpresent_az": "true", + "__meta_kubernetes_node_name": "foobar", + }, + { + "__address__": "2.3.4.5:9000", + "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", + "__meta_kubernetes_endpointslice_port": "9000", + "__meta_kubernetes_endpointslice_port_app_protocol": "http", + "__meta_kubernetes_endpointslice_port_name": "testport", + "__meta_kubernetes_endpointslice_port_protocol": "TCP", + }, + { + "__address__": "3.4.5.6:9000", + "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "false", + "__meta_kubernetes_endpointslice_port": "9000", + "__meta_kubernetes_endpointslice_port_app_protocol": "http", + "__meta_kubernetes_endpointslice_port_name": "testport", + "__meta_kubernetes_endpointslice_port_protocol": "TCP", + }, + }, + Labels: model.LabelSet{ + "__meta_kubernetes_endpointslice_address_type": "IPv4", + "__meta_kubernetes_endpointslice_name": "testendpoints", + "__meta_kubernetes_namespace": "default", + "__meta_kubernetes_service_label_app_name": "test", + "__meta_kubernetes_service_labelpresent_app_name": "true", + "__meta_kubernetes_service_name": "testendpoints", + }, + Source: "endpointslice/default/testendpoints", + }, + }, + }.Run(t) +} + func TestEndpointSliceDiscoveryNamespaces(t *testing.T) { epOne := makeEndpointSliceV1() epOne.Namespace = "ns1" diff --git a/discovery/kubernetes/kubernetes.go b/discovery/kubernetes/kubernetes.go index 3f417c49b3..2d1e36d57d 100644 --- a/discovery/kubernetes/kubernetes.go +++ b/discovery/kubernetes/kubernetes.go @@ -23,6 +23,8 @@ import ( "sync" "time" + disv1beta1 "k8s.io/api/discovery/v1beta1" + "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" @@ -31,7 +33,6 @@ import ( "github.com/prometheus/common/version" apiv1 "k8s.io/api/core/v1" disv1 "k8s.io/api/discovery/v1" - disv1beta1 "k8s.io/api/discovery/v1beta1" networkv1 "k8s.io/api/networking/v1" "k8s.io/api/networking/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -406,7 +407,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { } for _, namespace := range namespaces { - var informer cache.SharedInformer + var informer cache.SharedIndexInformer if v1Supported { e := d.client.DiscoveryV1().EndpointSlices(namespace) elw := &cache.ListWatch{ @@ -421,7 +422,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { return e.Watch(ctx, options) }, } - informer = cache.NewSharedInformer(elw, &disv1.EndpointSlice{}, resyncPeriod) + informer = d.newEndpointSlicesByNodeInformer(elw, &disv1.EndpointSlice{}) } else { e := d.client.DiscoveryV1beta1().EndpointSlices(namespace) elw := &cache.ListWatch{ @@ -436,7 +437,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { return e.Watch(ctx, options) }, } - informer = cache.NewSharedInformer(elw, &disv1beta1.EndpointSlice{}, resyncPeriod) + informer = d.newEndpointSlicesByNodeInformer(elw, &disv1beta1.EndpointSlice{}) } s := d.client.CoreV1().Services(namespace) @@ -465,11 +466,17 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { return p.Watch(ctx, options) }, } + var nodeInf cache.SharedInformer + if d.attachMetadata.Node { + nodeInf = d.newNodeInformer(context.Background()) + go nodeInf.Run(ctx.Done()) + } eps := NewEndpointSlice( log.With(d.logger, "role", "endpointslice"), - cache.NewSharedInformer(slw, &apiv1.Service{}, resyncPeriod), informer, + cache.NewSharedInformer(slw, &apiv1.Service{}, resyncPeriod), cache.NewSharedInformer(plw, &apiv1.Pod{}, resyncPeriod), + nodeInf, ) d.discoverers = append(d.discoverers, eps) go eps.endpointSliceInf.Run(ctx.Done()) @@ -517,11 +524,18 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { return p.Watch(ctx, options) }, } + var nodeInf cache.SharedInformer + if d.attachMetadata.Node { + nodeInf = d.newNodeInformer(ctx) + go nodeInf.Run(ctx.Done()) + } + eps := NewEndpoints( log.With(d.logger, "role", "endpoint"), + d.newEndpointsByNodeInformer(elw), cache.NewSharedInformer(slw, &apiv1.Service{}, resyncPeriod), - cache.NewSharedInformer(elw, &apiv1.Endpoints{}, resyncPeriod), cache.NewSharedInformer(plw, &apiv1.Pod{}, resyncPeriod), + nodeInf, ) d.discoverers = append(d.discoverers, eps) go eps.endpointsInf.Run(ctx.Done()) @@ -735,6 +749,65 @@ func (d *Discovery) newPodsByNodeInformer(plw *cache.ListWatch) cache.SharedInde return cache.NewSharedIndexInformer(plw, &apiv1.Pod{}, resyncPeriod, indexers) } +func (d *Discovery) newEndpointsByNodeInformer(plw *cache.ListWatch) cache.SharedIndexInformer { + indexers := make(map[string]cache.IndexFunc) + if !d.attachMetadata.Node { + return cache.NewSharedIndexInformer(plw, &apiv1.Endpoints{}, resyncPeriod, indexers) + } + + indexers[nodeIndex] = func(obj interface{}) ([]string, error) { + e, ok := obj.(*apiv1.Endpoints) + if !ok { + return nil, fmt.Errorf("object is not a pod") + } + var nodes []string + for _, target := range e.Subsets { + for _, addr := range target.Addresses { + if addr.NodeName == nil { + continue + } + nodes = append(nodes, *addr.NodeName) + } + } + return nodes, nil + } + + return cache.NewSharedIndexInformer(plw, &apiv1.Endpoints{}, resyncPeriod, indexers) +} + +func (d *Discovery) newEndpointSlicesByNodeInformer(plw *cache.ListWatch, object runtime.Object) cache.SharedIndexInformer { + indexers := make(map[string]cache.IndexFunc) + if !d.attachMetadata.Node { + cache.NewSharedIndexInformer(plw, &disv1.EndpointSlice{}, resyncPeriod, indexers) + } + + indexers[nodeIndex] = func(obj interface{}) ([]string, error) { + var nodes []string + switch e := obj.(type) { + case *disv1.EndpointSlice: + for _, target := range e.Endpoints { + if target.NodeName == nil { + continue + } + nodes = append(nodes, *target.NodeName) + } + case *disv1beta1.EndpointSlice: + for _, target := range e.Endpoints { + if target.NodeName == nil { + continue + } + nodes = append(nodes, *target.NodeName) + } + default: + return nil, fmt.Errorf("object is not an endpointslice") + } + + return nodes, nil + } + + return cache.NewSharedIndexInformer(plw, object, resyncPeriod, indexers) +} + func checkDiscoveryV1Supported(client kubernetes.Interface) (bool, error) { k8sVer, err := client.Discovery().ServerVersion() if err != nil { diff --git a/discovery/kubernetes/pod.go b/discovery/kubernetes/pod.go index 10ec4512a2..2e55dce789 100644 --- a/discovery/kubernetes/pod.go +++ b/discovery/kubernetes/pod.go @@ -253,7 +253,7 @@ func (p *Pod) buildPod(pod *apiv1.Pod) *targetgroup.Group { tg.Labels = podLabels(pod) tg.Labels[namespaceLabel] = lv(pod.Namespace) if p.withNodeMetadata { - p.attachNodeMetadata(tg, pod) + tg.Labels = addNodeLabels(tg.Labels, p.nodeInf, p.logger, &pod.Spec.NodeName) } containers := append(pod.Spec.Containers, pod.Spec.InitContainers...) @@ -291,27 +291,6 @@ func (p *Pod) buildPod(pod *apiv1.Pod) *targetgroup.Group { return tg } -func (p *Pod) attachNodeMetadata(tg *targetgroup.Group, pod *apiv1.Pod) { - tg.Labels[nodeNameLabel] = lv(pod.Spec.NodeName) - - obj, exists, err := p.nodeInf.GetStore().GetByKey(pod.Spec.NodeName) - if err != nil { - level.Error(p.logger).Log("msg", "Error getting node", "node", pod.Spec.NodeName, "err", err) - return - } - - if !exists { - return - } - - node := obj.(*apiv1.Node) - for k, v := range node.GetLabels() { - ln := strutil.SanitizeLabelName(k) - tg.Labels[model.LabelName(nodeLabelPrefix+ln)] = lv(v) - tg.Labels[model.LabelName(nodeLabelPresentPrefix+ln)] = presentValue - } -} - func (p *Pod) enqueuePodsForNode(nodeName string) { pods, err := p.podInf.GetIndexer().ByIndex(nodeIndex, nodeName) if err != nil { diff --git a/discovery/nomad/nomad.go b/discovery/nomad/nomad.go new file mode 100644 index 0000000000..c8d5130396 --- /dev/null +++ b/discovery/nomad/nomad.go @@ -0,0 +1,210 @@ +// Copyright 2022 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package nomad + +import ( + "context" + "errors" + "fmt" + "net" + "strconv" + "strings" + "time" + + "github.com/go-kit/log" + nomad "github.com/hashicorp/nomad/api" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/config" + "github.com/prometheus/common/model" + + "github.com/prometheus/prometheus/discovery" + "github.com/prometheus/prometheus/discovery/refresh" + "github.com/prometheus/prometheus/discovery/targetgroup" +) + +const ( + // nomadLabel is the name for the label containing a target. + nomadLabel = model.MetaLabelPrefix + "nomad_" + // serviceLabel is the name of the label containing the service name. + nomadAddress = nomadLabel + "address" + nomadService = nomadLabel + "service" + nomadNamespace = nomadLabel + "namespace" + nomadNodeID = nomadLabel + "node_id" + nomadDatacenter = nomadLabel + "dc" + nomadServiceAddress = nomadService + "_address" + nomadServicePort = nomadService + "_port" + nomadServiceID = nomadService + "_id" + nomadTags = nomadLabel + "tags" +) + +// DefaultSDConfig is the default nomad SD configuration. +var ( + DefaultSDConfig = SDConfig{ + AllowStale: true, + HTTPClientConfig: config.DefaultHTTPClientConfig, + Namespace: "default", + RefreshInterval: model.Duration(60 * time.Second), + Region: "global", + Server: "http://localhost:4646", + TagSeparator: ",", + } + + failuresCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_sd_nomad_failures_total", + Help: "Number of nomad service discovery refresh failures.", + }) +) + +func init() { + discovery.RegisterConfig(&SDConfig{}) + prometheus.MustRegister(failuresCount) +} + +// SDConfig is the configuration for nomad based service discovery. +type SDConfig struct { + AllowStale bool `yaml:"allow_stale"` + HTTPClientConfig config.HTTPClientConfig `yaml:",inline"` + Namespace string `yaml:"namespace"` + RefreshInterval model.Duration `yaml:"refresh_interval"` + Region string `yaml:"region"` + Server string `yaml:"server"` + TagSeparator string `yaml:"tag_separator,omitempty"` +} + +// Name returns the name of the Config. +func (*SDConfig) Name() string { return "nomad" } + +// NewDiscoverer returns a Discoverer for the Config. +func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { + return NewDiscovery(c, opts.Logger) +} + +// SetDirectory joins any relative file paths with dir. +func (c *SDConfig) SetDirectory(dir string) { + c.HTTPClientConfig.SetDirectory(dir) +} + +// UnmarshalYAML implements the yaml.Unmarshaler interface. +func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { + *c = DefaultSDConfig + type plain SDConfig + err := unmarshal((*plain)(c)) + if err != nil { + return err + } + if strings.TrimSpace(c.Server) == "" { + return errors.New("nomad SD configuration requires a server address") + } + return c.HTTPClientConfig.Validate() +} + +// Discovery periodically performs nomad requests. It implements +// the Discoverer interface. +type Discovery struct { + *refresh.Discovery + allowStale bool + client *nomad.Client + namespace string + refreshInterval time.Duration + region string + server string + tagSeparator string +} + +// NewDiscovery returns a new Discovery which periodically refreshes its targets. +func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { + d := &Discovery{ + allowStale: conf.AllowStale, + namespace: conf.Namespace, + refreshInterval: time.Duration(conf.RefreshInterval), + region: conf.Region, + server: conf.Server, + tagSeparator: conf.TagSeparator, + } + + HTTPClient, err := config.NewClientFromConfig(conf.HTTPClientConfig, "nomad_sd") + if err != nil { + return nil, err + } + + config := nomad.Config{ + Address: conf.Server, + HttpClient: HTTPClient, + Namespace: conf.Namespace, + Region: conf.Region, + } + + client, err := nomad.NewClient(&config) + if err != nil { + return nil, err + } + d.client = client + + d.Discovery = refresh.NewDiscovery( + logger, + "nomad", + time.Duration(conf.RefreshInterval), + d.refresh, + ) + return d, nil +} + +func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { + opts := &nomad.QueryOptions{ + AllowStale: d.allowStale, + } + stubs, _, err := d.client.Services().List(opts) + if err != nil { + failuresCount.Inc() + return nil, err + } + + tg := &targetgroup.Group{ + Source: "Nomad", + } + + for _, stub := range stubs { + for _, service := range stub.Services { + instances, _, err := d.client.Services().Get(service.ServiceName, opts) + if err != nil { + failuresCount.Inc() + return nil, fmt.Errorf("failed to fetch services: %w", err) + } + + for _, instance := range instances { + labels := model.LabelSet{ + nomadAddress: model.LabelValue(instance.Address), + nomadDatacenter: model.LabelValue(instance.Datacenter), + nomadNodeID: model.LabelValue(instance.NodeID), + nomadNamespace: model.LabelValue(instance.Namespace), + nomadServiceAddress: model.LabelValue(instance.Address), + nomadServiceID: model.LabelValue(instance.ID), + nomadServicePort: model.LabelValue(strconv.Itoa(instance.Port)), + nomadService: model.LabelValue(instance.ServiceName), + } + addr := net.JoinHostPort(instance.Address, strconv.FormatInt(int64(instance.Port), 10)) + labels[model.AddressLabel] = model.LabelValue(addr) + + if len(instance.Tags) > 0 { + tags := d.tagSeparator + strings.Join(instance.Tags, d.tagSeparator) + d.tagSeparator + labels[nomadTags] = model.LabelValue(tags) + } + + tg.Targets = append(tg.Targets, labels) + } + } + } + return []*targetgroup.Group{tg}, nil +} diff --git a/discovery/nomad/nomad_test.go b/discovery/nomad/nomad_test.go new file mode 100644 index 0000000000..40d412d74f --- /dev/null +++ b/discovery/nomad/nomad_test.go @@ -0,0 +1,170 @@ +// Copyright 2022 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package nomad + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "net/url" + "testing" + + "github.com/go-kit/log" + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" +) + +type NomadSDTestSuite struct { + Mock *SDMock +} + +// SDMock is the interface for the nomad mock +type SDMock struct { + t *testing.T + Server *httptest.Server + Mux *http.ServeMux +} + +// NewSDMock returns a new SDMock. +func NewSDMock(t *testing.T) *SDMock { + return &SDMock{ + t: t, + } +} + +// Endpoint returns the URI to the mock server. +func (m *SDMock) Endpoint() string { + return m.Server.URL + "/" +} + +// Setup creates the mock server. +func (m *SDMock) Setup() { + m.Mux = http.NewServeMux() + m.Server = httptest.NewServer(m.Mux) +} + +// ShutdownServer creates the mock server. +func (m *SDMock) ShutdownServer() { + m.Server.Close() +} + +func (s *NomadSDTestSuite) TearDownSuite() { + s.Mock.ShutdownServer() +} + +func (s *NomadSDTestSuite) SetupTest(t *testing.T) { + s.Mock = NewSDMock(t) + s.Mock.Setup() + + s.Mock.HandleServicesList() + s.Mock.HandleServiceHashiCupsGet() +} + +func (m *SDMock) HandleServicesList() { + m.Mux.HandleFunc("/v1/services", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("content-type", "application/json; charset=utf-8") + w.WriteHeader(http.StatusOK) + + fmt.Fprint(w, ` + [ + { + "Namespace": "default", + "Services": [ + { + "ServiceName": "hashicups", + "Tags": [ + "metrics" + ] + } + ] + } + ]`, + ) + }) +} + +func (m *SDMock) HandleServiceHashiCupsGet() { + m.Mux.HandleFunc("/v1/service/hashicups", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("content-type", "application/json; charset=utf-8") + w.WriteHeader(http.StatusOK) + + fmt.Fprint(w, ` + [ + { + "ID": "_nomad-task-6a1d5f0a-7362-3f5d-9baf-5ed438918e50-group-hashicups-hashicups-hashicups_ui", + "ServiceName": "hashicups", + "Namespace": "default", + "NodeID": "d92fdc3c-9c2b-298a-e8f4-c33f3a449f09", + "Datacenter": "dc1", + "JobID": "dashboard", + "AllocID": "6a1d5f0a-7362-3f5d-9baf-5ed438918e50", + "Tags": [ + "metrics" + ], + "Address": "127.0.0.1", + "Port": 30456, + "CreateIndex": 226, + "ModifyIndex": 226 + } + ]`, + ) + }) +} + +func TestConfiguredService(t *testing.T) { + conf := &SDConfig{ + Server: "http://localhost:4646", + } + _, err := NewDiscovery(conf, nil) + require.NoError(t, err) +} + +func TestNomadSDRefresh(t *testing.T) { + sdmock := &NomadSDTestSuite{} + sdmock.SetupTest(t) + t.Cleanup(sdmock.TearDownSuite) + + endpoint, err := url.Parse(sdmock.Mock.Endpoint()) + require.NoError(t, err) + + cfg := DefaultSDConfig + cfg.Server = endpoint.String() + d, err := NewDiscovery(&cfg, log.NewNopLogger()) + require.NoError(t, err) + + tgs, err := d.refresh(context.Background()) + require.NoError(t, err) + + require.Equal(t, 1, len(tgs)) + + tg := tgs[0] + require.NotNil(t, tg) + require.NotNil(t, tg.Targets) + require.Equal(t, 1, len(tg.Targets)) + + lbls := model.LabelSet{ + "__address__": model.LabelValue("127.0.0.1:30456"), + "__meta_nomad_address": model.LabelValue("127.0.0.1"), + "__meta_nomad_dc": model.LabelValue("dc1"), + "__meta_nomad_namespace": model.LabelValue("default"), + "__meta_nomad_node_id": model.LabelValue("d92fdc3c-9c2b-298a-e8f4-c33f3a449f09"), + "__meta_nomad_service": model.LabelValue("hashicups"), + "__meta_nomad_service_address": model.LabelValue("127.0.0.1"), + "__meta_nomad_service_id": model.LabelValue("_nomad-task-6a1d5f0a-7362-3f5d-9baf-5ed438918e50-group-hashicups-hashicups-hashicups_ui"), + "__meta_nomad_service_port": model.LabelValue("30456"), + "__meta_nomad_tags": model.LabelValue(",metrics,"), + } + require.Equal(t, lbls, tg.Targets[0]) +} diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index b1d5915822..c525109fdd 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -285,6 +285,10 @@ marathon_sd_configs: nerve_sd_configs: [ - ... ] +# List of Nomad service discovery configurations. +nomad_sd_configs: + [ - ... ] + # List of OpenStack service discovery configurations. openstack_sd_configs: [ - ... ] @@ -1686,7 +1690,7 @@ Available meta labels: * `__meta_kubernetes_pod_name`: The name of the pod object. * `__meta_kubernetes_pod_ip`: The pod IP of the pod object. * `__meta_kubernetes_pod_label_`: Each label from the pod object. -* `__meta_kubernetes_pod_labelpresent_`: `true`for each label from the pod object. +* `__meta_kubernetes_pod_labelpresent_`: `true` for each label from the pod object. * `__meta_kubernetes_pod_annotation_`: Each annotation from the pod object. * `__meta_kubernetes_pod_annotationpresent_`: `true` for each annotation from the pod object. * `__meta_kubernetes_pod_container_init`: `true` if the container is an [InitContainer](https://kubernetes.io/docs/concepts/workloads/pods/init-containers/) @@ -1713,6 +1717,8 @@ Available meta labels: * `__meta_kubernetes_namespace`: The namespace of the endpoints object. * `__meta_kubernetes_endpoints_name`: The names of the endpoints object. +* `__meta_kubernetes_endpoints_label_`: Each label from the endpoints object. +* `__meta_kubernetes_endpoints_labelpresent_`: `true` for each label from the endpoints object. * For all targets discovered directly from the endpoints list (those not additionally inferred from underlying pods), the following labels are attached: * `__meta_kubernetes_endpoint_hostname`: Hostname of the endpoint. @@ -1851,7 +1857,7 @@ namespaces: # Optional metadata to attach to discovered targets. If omitted, no additional metadata is attached. attach_metadata: -# Attaches node metadata to discovered targets. Only valid for role: pod. +# Attaches node metadata to discovered targets. Valid for roles: pod, endpoints, endpointslice. # When set to true, Prometheus must have permissions to get Nodes. [ node: | default = false ] ``` @@ -2174,6 +2180,73 @@ paths: - [ timeout: | default = 10s ] ``` +### `` + +Nomad SD configurations allow retrieving scrape targets from [Nomad's](https://www.nomadproject.io/) +Service API. + +The following meta labels are available on targets during [relabeling](#relabel_config): + +* `__meta_nomad_address`: the service address of the target +* `__meta_nomad_dc`: the datacenter name for the target +* `__meta_nomad_namespace`: the namespace of the target +* `__meta_nomad_node_id`: the node name defined for the target +* `__meta_nomad_service`: the name of the service the target belongs to +* `__meta_nomad_service_address`: the service address of the target +* `__meta_nomad_service_id`: the service ID of the target +* `__meta_nomad_service_port`: the service port of the target +* `__meta_nomad_tags`: the list of tags of the target joined by the tag separator + +```yaml +# The information to access the Nomad API. It is to be defined +# as the Nomad documentation requires. +[ allow_stale: | default = true ] +[ datacenter: ] +[ namespace: | default = default ] +[ refresh_interval: | default = 60s ] +[ region: | default = global ] +[ server: ] +[ tag_separator: | default = ,] + +# Authentication information used to authenticate to the nomad server. +# Note that `basic_auth`, `authorization` and `oauth2` options are +# mutually exclusive. +# `password` and `password_file` are mutually exclusive. + +# Optional HTTP basic authentication information. +basic_auth: + [ username: ] + [ password: ] + [ password_file: ] + +# Optional `Authorization` header configuration. +authorization: + # Sets the authentication type. + [ type: | default: Bearer ] + # Sets the credentials. It is mutually exclusive with + # `credentials_file`. + [ credentials: ] + # Sets the credentials to the credentials read from the configured file. + # It is mutually exclusive with `credentials`. + [ credentials_file: ] + +# Optional OAuth 2.0 configuration. +oauth2: + [ ] + +# Optional proxy URL. +[ proxy_url: ] + +# Configure whether HTTP requests follow HTTP 3xx redirects. +[ follow_redirects: | default = true ] + +# Whether to enable HTTP2. +[ enable_http2: | default: true ] + +# TLS configuration. +tls_config: + [ ] +``` ### `` @@ -2866,6 +2939,10 @@ marathon_sd_configs: nerve_sd_configs: [ - ... ] +# List of Nomad service discovery configurations. +nomad_sd_configs: + [ - ... ] + # List of OpenStack service discovery configurations. openstack_sd_configs: [ - ... ] diff --git a/docs/configuration/https.md b/docs/configuration/https.md index c060ec4288..ad142e4a56 100644 --- a/docs/configuration/https.md +++ b/docs/configuration/https.md @@ -54,6 +54,9 @@ tls_server_config: # Go default cipher suites are used. Available cipher suites are documented # in the go documentation: # https://golang.org/pkg/crypto/tls/#pkg-constants + # + # Note that only the cipher returned by the following function are supported: + # https://pkg.go.dev/crypto/tls#CipherSuites [ cipher_suites: [ - ] ] diff --git a/docs/querying/operators.md b/docs/querying/operators.md index 9f35e0c153..9c1249b464 100644 --- a/docs/querying/operators.md +++ b/docs/querying/operators.md @@ -114,6 +114,37 @@ Operations between vectors attempt to find a matching element in the right-hand vector for each entry in the left-hand side. There are two basic types of matching behavior: One-to-one and many-to-one/one-to-many. +### Vector matching keywords + +These vector matching keywords allow for matching between series with different label sets +providing: + +* `on` +* `ignoring` + +Label lists provided to matching keywords will determine how vectors are combined. Examples +can be found in [One-to-one vector matches](#one-to-one-vector-matches) and in +[Many-to-one and one-to-many vector matches](#many-to-one-and-one-to-many-vector-matches) + +### Group modifiers + +These group modifiers enable many-to-one/one-to-many vector matching: + +* `group_left` +* `group_right` + +Label lists can be provided to the group modifier which contain labels from the "one"-side to +be included in the result metrics. + +_Many-to-one and one-to-many matching are advanced use cases that should be carefully considered. +Often a proper use of `ignoring()` provides the desired outcome._ + +_Grouping modifiers can only be used for +[comparison](#comparison-binary-operators) and +[arithmetic](#arithmetic-binary-operators). Operations as `and`, `unless` and +`or` operations match with all possible entries in the right vector by +default._ + ### One-to-one vector matches **One-to-one** finds a unique pair of entries from each side of the operation. @@ -153,7 +184,7 @@ The entries with methods `put` and `del` have no match and will not show up in t **Many-to-one** and **one-to-many** matchings refer to the case where each vector element on the "one"-side can match with multiple elements on the "many"-side. This has to -be explicitly requested using the `group_left` or `group_right` modifier, where +be explicitly requested using the `group_left` or `group_right` [modifiers](#group-modifiers), where left/right determines which vector has the higher cardinality. ignoring(