2017-11-25 05:13:54 -08:00
|
|
|
// Copyright 2013 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2018-02-01 01:55:07 -08:00
|
|
|
package scrape
|
2017-11-25 05:13:54 -08:00
|
|
|
|
|
|
|
import (
|
2019-03-12 03:46:15 -07:00
|
|
|
"encoding"
|
2019-02-13 05:24:22 -08:00
|
|
|
"fmt"
|
2019-03-12 03:46:15 -07:00
|
|
|
"hash/fnv"
|
|
|
|
"net"
|
|
|
|
"os"
|
2018-01-18 03:49:42 -08:00
|
|
|
"reflect"
|
2018-01-17 03:46:17 -08:00
|
|
|
"sync"
|
2018-09-26 02:20:56 -07:00
|
|
|
"time"
|
2017-11-25 05:13:54 -08:00
|
|
|
|
|
|
|
"github.com/go-kit/kit/log"
|
|
|
|
"github.com/go-kit/kit/log/level"
|
2019-03-25 16:01:12 -07:00
|
|
|
"github.com/pkg/errors"
|
2017-11-25 05:13:54 -08:00
|
|
|
|
|
|
|
"github.com/prometheus/prometheus/config"
|
Refactor SD configuration to remove `config` dependency (#3629)
* refactor: move targetGroup struct and CheckOverflow() to their own package
* refactor: move auth and security related structs to a utility package, fix import error in utility package
* refactor: Azure SD, remove SD struct from config
* refactor: DNS SD, remove SD struct from config into dns package
* refactor: ec2 SD, move SD struct from config into the ec2 package
* refactor: file SD, move SD struct from config to file discovery package
* refactor: gce, move SD struct from config to gce discovery package
* refactor: move HTTPClientConfig and URL into util/config, fix import error in httputil
* refactor: consul, move SD struct from config into consul discovery package
* refactor: marathon, move SD struct from config into marathon discovery package
* refactor: triton, move SD struct from config to triton discovery package, fix test
* refactor: zookeeper, move SD structs from config to zookeeper discovery package
* refactor: openstack, remove SD struct from config, move into openstack discovery package
* refactor: kubernetes, move SD struct from config into kubernetes discovery package
* refactor: notifier, use targetgroup package instead of config
* refactor: tests for file, marathon, triton SD - use targetgroup package instead of config.TargetGroup
* refactor: retrieval, use targetgroup package instead of config.TargetGroup
* refactor: storage, use config util package
* refactor: discovery manager, use targetgroup package instead of config.TargetGroup
* refactor: use HTTPClient and TLS config from configUtil instead of config
* refactor: tests, use targetgroup package instead of config.TargetGroup
* refactor: fix tagetgroup.Group pointers that were removed by mistake
* refactor: openstack, kubernetes: drop prefixes
* refactor: remove import aliases forced due to vscode bug
* refactor: move main SD struct out of config into discovery/config
* refactor: rename configUtil to config_util
* refactor: rename yamlUtil to yaml_config
* refactor: kubernetes, remove prefixes
* refactor: move the TargetGroup package to discovery/
* refactor: fix order of imports
2017-12-29 12:01:34 -08:00
|
|
|
"github.com/prometheus/prometheus/discovery/targetgroup"
|
2019-03-13 03:02:36 -07:00
|
|
|
"github.com/prometheus/prometheus/pkg/labels"
|
2017-11-25 05:13:54 -08:00
|
|
|
"github.com/prometheus/prometheus/storage"
|
|
|
|
)
|
|
|
|
|
|
|
|
// Appendable returns an Appender.
|
|
|
|
type Appendable interface {
|
|
|
|
Appender() (storage.Appender, error)
|
|
|
|
}
|
|
|
|
|
2018-02-01 02:06:24 -08:00
|
|
|
// NewManager is the Manager constructor
|
|
|
|
func NewManager(logger log.Logger, app Appendable) *Manager {
|
2018-09-26 02:20:56 -07:00
|
|
|
if logger == nil {
|
|
|
|
logger = log.NewNopLogger()
|
|
|
|
}
|
2018-02-01 02:06:24 -08:00
|
|
|
return &Manager{
|
2017-11-25 05:13:54 -08:00
|
|
|
append: app,
|
|
|
|
logger: logger,
|
|
|
|
scrapeConfigs: make(map[string]*config.ScrapeConfig),
|
|
|
|
scrapePools: make(map[string]*scrapePool),
|
2017-11-26 07:15:15 -08:00
|
|
|
graceShut: make(chan struct{}),
|
2018-09-26 02:20:56 -07:00
|
|
|
triggerReload: make(chan struct{}, 1),
|
2017-11-25 05:13:54 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-01 02:06:24 -08:00
|
|
|
// Manager maintains a set of scrape pools and manages start/stop cycles
|
2017-11-25 05:13:54 -08:00
|
|
|
// when receiving new target groups form the discovery manager.
|
2018-02-01 02:06:24 -08:00
|
|
|
type Manager struct {
|
2018-04-09 07:18:25 -07:00
|
|
|
logger log.Logger
|
|
|
|
append Appendable
|
|
|
|
graceShut chan struct{}
|
|
|
|
|
2019-03-12 03:46:15 -07:00
|
|
|
jitterSeed uint64 // Global jitterSeed seed is used to spread scrape workload across HA setup.
|
2018-04-09 07:18:25 -07:00
|
|
|
mtxScrape sync.Mutex // Guards the fields below.
|
2017-11-25 05:13:54 -08:00
|
|
|
scrapeConfigs map[string]*config.ScrapeConfig
|
|
|
|
scrapePools map[string]*scrapePool
|
2018-09-26 02:20:56 -07:00
|
|
|
targetSets map[string][]*targetgroup.Group
|
|
|
|
|
|
|
|
triggerReload chan struct{}
|
2017-11-25 05:13:54 -08:00
|
|
|
}
|
|
|
|
|
2018-09-26 02:20:56 -07:00
|
|
|
// Run receives and saves target set updates and triggers the scraping loops reloading.
|
|
|
|
// Reloading happens in the background so that it doesn't block receiving targets updates.
|
2018-02-01 02:06:24 -08:00
|
|
|
func (m *Manager) Run(tsets <-chan map[string][]*targetgroup.Group) error {
|
2018-09-26 02:20:56 -07:00
|
|
|
go m.reloader()
|
2017-11-25 05:13:54 -08:00
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case ts := <-tsets:
|
2018-09-26 02:20:56 -07:00
|
|
|
m.updateTsets(ts)
|
|
|
|
|
|
|
|
select {
|
|
|
|
case m.triggerReload <- struct{}{}:
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
2017-11-26 07:15:15 -08:00
|
|
|
case <-m.graceShut:
|
|
|
|
return nil
|
2017-11-25 05:13:54 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-26 02:20:56 -07:00
|
|
|
func (m *Manager) reloader() {
|
|
|
|
ticker := time.NewTicker(5 * time.Second)
|
|
|
|
defer ticker.Stop()
|
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-m.graceShut:
|
|
|
|
return
|
|
|
|
case <-ticker.C:
|
|
|
|
select {
|
|
|
|
case <-m.triggerReload:
|
|
|
|
m.reload()
|
|
|
|
case <-m.graceShut:
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m *Manager) reload() {
|
|
|
|
m.mtxScrape.Lock()
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
for setName, groups := range m.targetSets {
|
2019-02-13 05:24:22 -08:00
|
|
|
if _, ok := m.scrapePools[setName]; !ok {
|
2018-09-26 02:20:56 -07:00
|
|
|
scrapeConfig, ok := m.scrapeConfigs[setName]
|
|
|
|
if !ok {
|
|
|
|
level.Error(m.logger).Log("msg", "error reloading target set", "err", "invalid config id:"+setName)
|
2018-11-23 01:23:55 -08:00
|
|
|
continue
|
2018-09-26 02:20:56 -07:00
|
|
|
}
|
2019-03-12 03:46:15 -07:00
|
|
|
sp, err := newScrapePool(scrapeConfig, m.append, m.jitterSeed, log.With(m.logger, "scrape_pool", setName))
|
2019-02-13 05:24:22 -08:00
|
|
|
if err != nil {
|
|
|
|
level.Error(m.logger).Log("msg", "error creating new scrape pool", "err", err, "scrape_pool", setName)
|
|
|
|
continue
|
|
|
|
}
|
2018-09-26 02:20:56 -07:00
|
|
|
m.scrapePools[setName] = sp
|
|
|
|
}
|
|
|
|
|
|
|
|
wg.Add(1)
|
|
|
|
// Run the sync in parallel as these take a while and at high load can't catch up.
|
|
|
|
go func(sp *scrapePool, groups []*targetgroup.Group) {
|
|
|
|
sp.Sync(groups)
|
|
|
|
wg.Done()
|
2019-02-13 05:24:22 -08:00
|
|
|
}(m.scrapePools[setName], groups)
|
2018-09-26 02:20:56 -07:00
|
|
|
|
|
|
|
}
|
|
|
|
m.mtxScrape.Unlock()
|
|
|
|
wg.Wait()
|
|
|
|
}
|
|
|
|
|
2019-03-12 03:46:15 -07:00
|
|
|
// setJitterSeed calculates a global jitterSeed per server relying on extra label set.
|
2019-03-13 03:02:36 -07:00
|
|
|
func (m *Manager) setJitterSeed(labels labels.Labels) error {
|
2019-03-12 03:46:15 -07:00
|
|
|
h := fnv.New64a()
|
|
|
|
hostname, err := getFqdn()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if _, err := fmt.Fprintf(h, "%s%s", hostname, labels.String()); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
m.jitterSeed = h.Sum64()
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-11-26 07:15:15 -08:00
|
|
|
// Stop cancels all running scrape pools and blocks until all have exited.
|
2018-02-01 02:06:24 -08:00
|
|
|
func (m *Manager) Stop() {
|
2018-04-09 07:18:25 -07:00
|
|
|
m.mtxScrape.Lock()
|
|
|
|
defer m.mtxScrape.Unlock()
|
|
|
|
|
2017-11-26 07:15:15 -08:00
|
|
|
for _, sp := range m.scrapePools {
|
|
|
|
sp.stop()
|
|
|
|
}
|
|
|
|
close(m.graceShut)
|
|
|
|
}
|
|
|
|
|
2018-09-26 02:20:56 -07:00
|
|
|
func (m *Manager) updateTsets(tsets map[string][]*targetgroup.Group) {
|
|
|
|
m.mtxScrape.Lock()
|
|
|
|
m.targetSets = tsets
|
|
|
|
m.mtxScrape.Unlock()
|
|
|
|
}
|
|
|
|
|
2017-11-25 05:13:54 -08:00
|
|
|
// ApplyConfig resets the manager's target providers and job configurations as defined by the new cfg.
|
2018-02-01 02:06:24 -08:00
|
|
|
func (m *Manager) ApplyConfig(cfg *config.Config) error {
|
2018-04-09 07:18:25 -07:00
|
|
|
m.mtxScrape.Lock()
|
|
|
|
defer m.mtxScrape.Unlock()
|
|
|
|
|
2018-01-17 03:46:17 -08:00
|
|
|
c := make(map[string]*config.ScrapeConfig)
|
|
|
|
for _, scfg := range cfg.ScrapeConfigs {
|
|
|
|
c[scfg.JobName] = scfg
|
2017-11-25 05:13:54 -08:00
|
|
|
}
|
2018-01-17 03:46:17 -08:00
|
|
|
m.scrapeConfigs = c
|
2018-01-18 03:49:42 -08:00
|
|
|
|
2019-03-12 03:46:15 -07:00
|
|
|
if err := m.setJitterSeed(cfg.GlobalConfig.ExternalLabels); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2019-02-13 05:24:22 -08:00
|
|
|
// Cleanup and reload pool if the configuration has changed.
|
|
|
|
var failed bool
|
2018-01-18 03:49:42 -08:00
|
|
|
for name, sp := range m.scrapePools {
|
|
|
|
if cfg, ok := m.scrapeConfigs[name]; !ok {
|
|
|
|
sp.stop()
|
|
|
|
delete(m.scrapePools, name)
|
|
|
|
} else if !reflect.DeepEqual(sp.config, cfg) {
|
2019-02-13 05:24:22 -08:00
|
|
|
err := sp.reload(cfg)
|
|
|
|
if err != nil {
|
|
|
|
level.Error(m.logger).Log("msg", "error reloading scrape pool", "err", err, "scrape_pool", name)
|
|
|
|
failed = true
|
|
|
|
}
|
2018-01-18 03:49:42 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-13 05:24:22 -08:00
|
|
|
if failed {
|
2019-03-25 16:01:12 -07:00
|
|
|
return errors.New("failed to apply the new configuration")
|
2019-02-13 05:24:22 -08:00
|
|
|
}
|
2017-11-25 05:13:54 -08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-04-09 07:18:25 -07:00
|
|
|
// TargetsAll returns active and dropped targets grouped by job_name.
|
|
|
|
func (m *Manager) TargetsAll() map[string][]*Target {
|
2018-09-26 02:20:56 -07:00
|
|
|
m.mtxScrape.Lock()
|
|
|
|
defer m.mtxScrape.Unlock()
|
2018-01-17 03:46:17 -08:00
|
|
|
|
2018-09-26 02:20:56 -07:00
|
|
|
targets := make(map[string][]*Target, len(m.scrapePools))
|
|
|
|
for tset, sp := range m.scrapePools {
|
|
|
|
targets[tset] = append(sp.ActiveTargets(), sp.DroppedTargets()...)
|
2017-11-25 05:13:54 -08:00
|
|
|
|
2018-09-26 02:20:56 -07:00
|
|
|
}
|
|
|
|
return targets
|
2018-04-09 07:18:25 -07:00
|
|
|
}
|
2018-01-17 03:46:17 -08:00
|
|
|
|
2018-09-26 02:20:56 -07:00
|
|
|
// TargetsActive returns the active targets currently being scraped.
|
|
|
|
func (m *Manager) TargetsActive() map[string][]*Target {
|
|
|
|
m.mtxScrape.Lock()
|
|
|
|
defer m.mtxScrape.Unlock()
|
2017-11-25 05:13:54 -08:00
|
|
|
|
2018-09-26 02:20:56 -07:00
|
|
|
targets := make(map[string][]*Target, len(m.scrapePools))
|
|
|
|
for tset, sp := range m.scrapePools {
|
|
|
|
targets[tset] = sp.ActiveTargets()
|
2018-02-21 09:26:18 -08:00
|
|
|
}
|
2018-09-26 02:20:56 -07:00
|
|
|
return targets
|
2018-02-21 09:26:18 -08:00
|
|
|
}
|
|
|
|
|
2018-09-26 02:20:56 -07:00
|
|
|
// TargetsDropped returns the dropped targets during relabelling.
|
|
|
|
func (m *Manager) TargetsDropped() map[string][]*Target {
|
2018-04-09 07:18:25 -07:00
|
|
|
m.mtxScrape.Lock()
|
|
|
|
defer m.mtxScrape.Unlock()
|
2017-11-25 05:13:54 -08:00
|
|
|
|
2018-09-26 02:20:56 -07:00
|
|
|
targets := make(map[string][]*Target, len(m.scrapePools))
|
|
|
|
for tset, sp := range m.scrapePools {
|
|
|
|
targets[tset] = sp.DroppedTargets()
|
2018-01-14 11:42:31 -08:00
|
|
|
}
|
2018-09-26 02:20:56 -07:00
|
|
|
return targets
|
2017-11-25 05:13:54 -08:00
|
|
|
}
|
2019-03-12 03:46:15 -07:00
|
|
|
|
2019-03-15 05:02:16 -07:00
|
|
|
// getFqdn returns a FQDN if it's possible, otherwise falls back to hostname.
|
2019-03-12 03:46:15 -07:00
|
|
|
func getFqdn() (string, error) {
|
|
|
|
hostname, err := os.Hostname()
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
ips, err := net.LookupIP(hostname)
|
|
|
|
if err != nil {
|
2019-03-15 05:02:16 -07:00
|
|
|
// Return the system hostname if we can't look up the IP address.
|
|
|
|
return hostname, nil
|
2019-03-12 03:46:15 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
lookup := func(ipStr encoding.TextMarshaler) (string, error) {
|
|
|
|
ip, err := ipStr.MarshalText()
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
hosts, err := net.LookupAddr(string(ip))
|
|
|
|
if err != nil || len(hosts) == 0 {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
return hosts[0], nil
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, addr := range ips {
|
|
|
|
if ip := addr.To4(); ip != nil {
|
|
|
|
if fqdn, err := lookup(ip); err == nil {
|
|
|
|
return fqdn, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if ip := addr.To16(); ip != nil {
|
|
|
|
if fqdn, err := lookup(ip); err == nil {
|
|
|
|
return fqdn, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return hostname, nil
|
|
|
|
}
|