2017-11-25 05:13:54 -08:00
// Copyright 2013 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2018-02-01 01:55:07 -08:00
package scrape
2017-11-25 05:13:54 -08:00
import (
2023-11-02 13:45:07 -07:00
"errors"
2019-02-13 05:24:22 -08:00
"fmt"
2019-03-12 03:46:15 -07:00
"hash/fnv"
2018-01-18 03:49:42 -08:00
"reflect"
2018-01-17 03:46:17 -08:00
"sync"
2018-09-26 02:20:56 -07:00
"time"
2017-11-25 05:13:54 -08:00
2021-06-11 09:17:59 -07:00
"github.com/go-kit/log"
"github.com/go-kit/log/level"
2020-01-29 03:13:18 -08:00
"github.com/prometheus/client_golang/prometheus"
2022-03-08 15:48:47 -08:00
config_util "github.com/prometheus/common/config"
2022-05-05 15:42:04 -07:00
"github.com/prometheus/common/model"
2020-10-22 02:00:08 -07:00
2017-11-25 05:13:54 -08:00
"github.com/prometheus/prometheus/config"
Refactor SD configuration to remove `config` dependency (#3629)
* refactor: move targetGroup struct and CheckOverflow() to their own package
* refactor: move auth and security related structs to a utility package, fix import error in utility package
* refactor: Azure SD, remove SD struct from config
* refactor: DNS SD, remove SD struct from config into dns package
* refactor: ec2 SD, move SD struct from config into the ec2 package
* refactor: file SD, move SD struct from config to file discovery package
* refactor: gce, move SD struct from config to gce discovery package
* refactor: move HTTPClientConfig and URL into util/config, fix import error in httputil
* refactor: consul, move SD struct from config into consul discovery package
* refactor: marathon, move SD struct from config into marathon discovery package
* refactor: triton, move SD struct from config to triton discovery package, fix test
* refactor: zookeeper, move SD structs from config to zookeeper discovery package
* refactor: openstack, remove SD struct from config, move into openstack discovery package
* refactor: kubernetes, move SD struct from config into kubernetes discovery package
* refactor: notifier, use targetgroup package instead of config
* refactor: tests for file, marathon, triton SD - use targetgroup package instead of config.TargetGroup
* refactor: retrieval, use targetgroup package instead of config.TargetGroup
* refactor: storage, use config util package
* refactor: discovery manager, use targetgroup package instead of config.TargetGroup
* refactor: use HTTPClient and TLS config from configUtil instead of config
* refactor: tests, use targetgroup package instead of config.TargetGroup
* refactor: fix tagetgroup.Group pointers that were removed by mistake
* refactor: openstack, kubernetes: drop prefixes
* refactor: remove import aliases forced due to vscode bug
* refactor: move main SD struct out of config into discovery/config
* refactor: rename configUtil to config_util
* refactor: rename yamlUtil to yaml_config
* refactor: kubernetes, remove prefixes
* refactor: move the TargetGroup package to discovery/
* refactor: fix order of imports
2017-12-29 12:01:34 -08:00
"github.com/prometheus/prometheus/discovery/targetgroup"
2021-11-08 06:23:17 -08:00
"github.com/prometheus/prometheus/model/labels"
2017-11-25 05:13:54 -08:00
"github.com/prometheus/prometheus/storage"
2021-07-07 13:11:49 -07:00
"github.com/prometheus/prometheus/util/osutil"
2023-10-17 02:27:46 -07:00
"github.com/prometheus/prometheus/util/pool"
2017-11-25 05:13:54 -08:00
)
2023-10-03 13:09:25 -07:00
// NewManager is the Manager constructor.
2023-09-22 09:47:44 -07:00
func NewManager ( o * Options , logger log . Logger , app storage . Appendable , registerer prometheus . Registerer ) ( * Manager , error ) {
2021-08-24 05:31:14 -07:00
if o == nil {
o = & Options { }
}
2018-09-26 02:20:56 -07:00
if logger == nil {
logger = log . NewNopLogger ( )
}
2023-09-22 09:47:44 -07:00
sm , err := newScrapeMetrics ( registerer )
if err != nil {
return nil , fmt . Errorf ( "failed to create scrape manager due to error: %w" , err )
}
2020-01-29 03:13:18 -08:00
m := & Manager {
2017-11-25 05:13:54 -08:00
append : app ,
2021-08-24 05:31:14 -07:00
opts : o ,
2017-11-25 05:13:54 -08:00
logger : logger ,
scrapeConfigs : make ( map [ string ] * config . ScrapeConfig ) ,
scrapePools : make ( map [ string ] * scrapePool ) ,
2017-11-26 07:15:15 -08:00
graceShut : make ( chan struct { } ) ,
2018-09-26 02:20:56 -07:00
triggerReload : make ( chan struct { } , 1 ) ,
2023-09-22 09:47:44 -07:00
metrics : sm ,
2023-10-17 02:27:46 -07:00
buffers : pool . New ( 1e3 , 100e6 , 3 , func ( sz int ) interface { } { return make ( [ ] byte , 0 , sz ) } ) ,
2017-11-25 05:13:54 -08:00
}
2020-01-29 03:13:18 -08:00
2023-09-22 09:47:44 -07:00
m . metrics . setTargetMetadataCacheGatherer ( m )
return m , nil
2017-11-25 05:13:54 -08:00
}
2021-08-24 05:31:14 -07:00
// Options are the configuration parameters to the scrape manager.
type Options struct {
2022-07-20 04:35:47 -07:00
ExtraMetrics bool
NoDefaultPort bool
2022-05-03 11:45:52 -07:00
// Option used by downstream scraper users like OpenTelemetry Collector
// to help lookup metric metadata. Should be false for Prometheus.
PassMetadataInContext bool
2022-08-31 06:50:05 -07:00
// Option to enable the experimental in-memory metadata storage and append
// metadata to the WAL.
EnableMetadataStorage bool
2022-05-05 15:42:04 -07:00
// Option to increase the interval used by scrape manager to throttle target groups updates.
DiscoveryReloadInterval model . Duration
2023-12-11 00:43:42 -08:00
// Option to enable the ingestion of the created timestamp as a synthetic zero sample.
// See: https://github.com/prometheus/proposals/blob/main/proposals/2023-06-13_created-timestamp.md
EnableCreatedTimestampZeroIngestion bool
2024-01-09 12:17:55 -08:00
// if UTF8 is not allowed, use this method
NameEscapingScheme string
2022-03-08 15:48:47 -08:00
2022-03-24 15:16:59 -07:00
// Optional HTTP client options to use when scraping.
HTTPClientOptions [ ] config_util . HTTPClientOption
2023-12-11 00:43:42 -08:00
// private option for testability.
skipOffsetting bool
2021-08-24 05:31:14 -07:00
}
2024-01-10 08:24:05 -08:00
const DefaultNameEscapingScheme = model . ValueEncodingEscaping
2018-02-01 02:06:24 -08:00
// Manager maintains a set of scrape pools and manages start/stop cycles
2021-07-19 03:25:13 -07:00
// when receiving new target groups from the discovery manager.
2018-02-01 02:06:24 -08:00
type Manager struct {
2021-08-24 05:31:14 -07:00
opts * Options
2018-04-09 07:18:25 -07:00
logger log . Logger
2020-02-06 07:58:38 -08:00
append storage . Appendable
2018-04-09 07:18:25 -07:00
graceShut chan struct { }
2023-05-25 02:49:43 -07:00
offsetSeed uint64 // Global offsetSeed seed is used to spread scrape workload across HA setup.
2018-04-09 07:18:25 -07:00
mtxScrape sync . Mutex // Guards the fields below.
2017-11-25 05:13:54 -08:00
scrapeConfigs map [ string ] * config . ScrapeConfig
scrapePools map [ string ] * scrapePool
2018-09-26 02:20:56 -07:00
targetSets map [ string ] [ ] * targetgroup . Group
2023-10-17 02:27:46 -07:00
buffers * pool . Pool
2018-09-26 02:20:56 -07:00
triggerReload chan struct { }
2023-09-22 09:47:44 -07:00
metrics * scrapeMetrics
2017-11-25 05:13:54 -08:00
}
2018-09-26 02:20:56 -07:00
// Run receives and saves target set updates and triggers the scraping loops reloading.
// Reloading happens in the background so that it doesn't block receiving targets updates.
2018-02-01 02:06:24 -08:00
func ( m * Manager ) Run ( tsets <- chan map [ string ] [ ] * targetgroup . Group ) error {
2018-09-26 02:20:56 -07:00
go m . reloader ( )
2017-11-25 05:13:54 -08:00
for {
select {
case ts := <- tsets :
2018-09-26 02:20:56 -07:00
m . updateTsets ( ts )
select {
case m . triggerReload <- struct { } { } :
default :
}
2017-11-26 07:15:15 -08:00
case <- m . graceShut :
return nil
2017-11-25 05:13:54 -08:00
}
}
}
2018-09-26 02:20:56 -07:00
func ( m * Manager ) reloader ( ) {
2022-05-05 15:42:04 -07:00
reloadIntervalDuration := m . opts . DiscoveryReloadInterval
if reloadIntervalDuration < model . Duration ( 5 * time . Second ) {
reloadIntervalDuration = model . Duration ( 5 * time . Second )
}
ticker := time . NewTicker ( time . Duration ( reloadIntervalDuration ) )
2018-09-26 02:20:56 -07:00
defer ticker . Stop ( )
for {
select {
case <- m . graceShut :
return
case <- ticker . C :
select {
case <- m . triggerReload :
m . reload ( )
case <- m . graceShut :
return
}
}
}
}
func ( m * Manager ) reload ( ) {
m . mtxScrape . Lock ( )
2024-01-09 12:17:55 -08:00
defer m . mtxScrape . Unlock ( )
2024-01-18 08:35:50 -08:00
// var err error
// if m.opts.UTF8Names {
// model.NameValidationScheme = model.UTF8Validation
// } else {
// model.NameValidationScheme = model.LegacyValidation
// }
// level.Info(m.logger).Log("msg", "validation scheme", "scheme", model.NameValidationScheme, "arg", m.opts.UTF8Names)
// XXXXX the problem with this is that agent does not really use scrape.Options. Also too, this is like per-scrape not per-instance, so it's not really the right place for this at all.
// if m.opts.NameEscapingScheme != "" {
// model.NameEscapingScheme, err = model.ToEscapingScheme(m.opts.NameEscapingScheme)
// if err != nil {
// level.Error(m.logger).Log("msg", "error setting escaping scheme", "err", err)
// return
// }
// } else {
// model.NameEscapingScheme = DefaultNameEscapingScheme
// }
level . Info ( m . logger ) . Log ( "msg" , "ESCAPING SCHEME" , "scheme" , model . NameEscapingScheme . String ( ) )
2024-01-09 12:17:55 -08:00
2018-09-26 02:20:56 -07:00
var wg sync . WaitGroup
for setName , groups := range m . targetSets {
2019-02-13 05:24:22 -08:00
if _ , ok := m . scrapePools [ setName ] ; ! ok {
2018-09-26 02:20:56 -07:00
scrapeConfig , ok := m . scrapeConfigs [ setName ]
if ! ok {
level . Error ( m . logger ) . Log ( "msg" , "error reloading target set" , "err" , "invalid config id:" + setName )
2018-11-23 01:23:55 -08:00
continue
2018-09-26 02:20:56 -07:00
}
2023-09-22 09:47:44 -07:00
m . metrics . targetScrapePools . Inc ( )
2023-10-17 02:27:46 -07:00
sp , err := newScrapePool ( scrapeConfig , m . append , m . offsetSeed , log . With ( m . logger , "scrape_pool" , setName ) , m . buffers , m . opts , m . metrics )
2019-02-13 05:24:22 -08:00
if err != nil {
2023-09-22 09:47:44 -07:00
m . metrics . targetScrapePoolsFailed . Inc ( )
2019-02-13 05:24:22 -08:00
level . Error ( m . logger ) . Log ( "msg" , "error creating new scrape pool" , "err" , err , "scrape_pool" , setName )
continue
}
2018-09-26 02:20:56 -07:00
m . scrapePools [ setName ] = sp
}
wg . Add ( 1 )
// Run the sync in parallel as these take a while and at high load can't catch up.
go func ( sp * scrapePool , groups [ ] * targetgroup . Group ) {
sp . Sync ( groups )
wg . Done ( )
2019-02-13 05:24:22 -08:00
} ( m . scrapePools [ setName ] , groups )
2018-09-26 02:20:56 -07:00
}
wg . Wait ( )
}
2023-05-25 02:49:43 -07:00
// setOffsetSeed calculates a global offsetSeed per server relying on extra label set.
func ( m * Manager ) setOffsetSeed ( labels labels . Labels ) error {
2019-03-12 03:46:15 -07:00
h := fnv . New64a ( )
2021-07-07 13:11:49 -07:00
hostname , err := osutil . GetFQDN ( )
2019-03-12 03:46:15 -07:00
if err != nil {
return err
}
if _ , err := fmt . Fprintf ( h , "%s%s" , hostname , labels . String ( ) ) ; err != nil {
return err
}
2023-05-25 02:49:43 -07:00
m . offsetSeed = h . Sum64 ( )
2019-03-12 03:46:15 -07:00
return nil
}
2017-11-26 07:15:15 -08:00
// Stop cancels all running scrape pools and blocks until all have exited.
2018-02-01 02:06:24 -08:00
func ( m * Manager ) Stop ( ) {
2018-04-09 07:18:25 -07:00
m . mtxScrape . Lock ( )
defer m . mtxScrape . Unlock ( )
2017-11-26 07:15:15 -08:00
for _ , sp := range m . scrapePools {
sp . stop ( )
}
close ( m . graceShut )
}
2018-09-26 02:20:56 -07:00
func ( m * Manager ) updateTsets ( tsets map [ string ] [ ] * targetgroup . Group ) {
m . mtxScrape . Lock ( )
m . targetSets = tsets
m . mtxScrape . Unlock ( )
}
2017-11-25 05:13:54 -08:00
// ApplyConfig resets the manager's target providers and job configurations as defined by the new cfg.
2018-02-01 02:06:24 -08:00
func ( m * Manager ) ApplyConfig ( cfg * config . Config ) error {
2018-04-09 07:18:25 -07:00
m . mtxScrape . Lock ( )
defer m . mtxScrape . Unlock ( )
2023-02-24 02:47:12 -08:00
scfgs , err := cfg . GetScrapeConfigs ( )
if err != nil {
return err
}
2018-01-17 03:46:17 -08:00
c := make ( map [ string ] * config . ScrapeConfig )
2023-02-24 02:47:12 -08:00
for _ , scfg := range scfgs {
2018-01-17 03:46:17 -08:00
c [ scfg . JobName ] = scfg
2017-11-25 05:13:54 -08:00
}
2018-01-17 03:46:17 -08:00
m . scrapeConfigs = c
2018-01-18 03:49:42 -08:00
2023-05-25 02:49:43 -07:00
if err := m . setOffsetSeed ( cfg . GlobalConfig . ExternalLabels ) ; err != nil {
2019-03-12 03:46:15 -07:00
return err
}
2019-02-13 05:24:22 -08:00
// Cleanup and reload pool if the configuration has changed.
var failed bool
2018-01-18 03:49:42 -08:00
for name , sp := range m . scrapePools {
style: Replace `else if` cascades with `switch`
Wiser coders than myself have come to the conclusion that a `switch`
statement is almost always superior to a statement that includes any
`else if`.
The exceptions that I have found in our codebase are just these two:
* The `if else` is followed by an additional statement before the next
condition (separated by a `;`).
* The whole thing is within a `for` loop and `break` statements are
used. In this case, using `switch` would require tagging the `for`
loop, which probably tips the balance.
Why are `switch` statements more readable?
For one, fewer curly braces. But more importantly, the conditions all
have the same alignment, so the whole thing follows the natural flow
of going down a list of conditions. With `else if`, in contrast, all
conditions but the first are "hidden" behind `} else if `, harder to
spot and (for no good reason) presented differently from the first
condition.
I'm sure the aforemention wise coders can list even more reasons.
In any case, I like it so much that I have found myself recommending
it in code reviews. I would like to make it a habit in our code base,
without making it a hard requirement that we would test on the CI. But
for that, there has to be a role model, so this commit eliminates all
`if else` occurrences, unless it is autogenerated code or fits one of
the exceptions above.
Signed-off-by: beorn7 <beorn@grafana.com>
2023-04-12 07:14:31 -07:00
switch cfg , ok := m . scrapeConfigs [ name ] ; {
case ! ok :
2018-01-18 03:49:42 -08:00
sp . stop ( )
delete ( m . scrapePools , name )
style: Replace `else if` cascades with `switch`
Wiser coders than myself have come to the conclusion that a `switch`
statement is almost always superior to a statement that includes any
`else if`.
The exceptions that I have found in our codebase are just these two:
* The `if else` is followed by an additional statement before the next
condition (separated by a `;`).
* The whole thing is within a `for` loop and `break` statements are
used. In this case, using `switch` would require tagging the `for`
loop, which probably tips the balance.
Why are `switch` statements more readable?
For one, fewer curly braces. But more importantly, the conditions all
have the same alignment, so the whole thing follows the natural flow
of going down a list of conditions. With `else if`, in contrast, all
conditions but the first are "hidden" behind `} else if `, harder to
spot and (for no good reason) presented differently from the first
condition.
I'm sure the aforemention wise coders can list even more reasons.
In any case, I like it so much that I have found myself recommending
it in code reviews. I would like to make it a habit in our code base,
without making it a hard requirement that we would test on the CI. But
for that, there has to be a role model, so this commit eliminates all
`if else` occurrences, unless it is autogenerated code or fits one of
the exceptions above.
Signed-off-by: beorn7 <beorn@grafana.com>
2023-04-12 07:14:31 -07:00
case ! reflect . DeepEqual ( sp . config , cfg ) :
2019-02-13 05:24:22 -08:00
err := sp . reload ( cfg )
if err != nil {
level . Error ( m . logger ) . Log ( "msg" , "error reloading scrape pool" , "err" , err , "scrape_pool" , name )
failed = true
}
2018-01-18 03:49:42 -08:00
}
}
2019-02-13 05:24:22 -08:00
if failed {
2019-03-25 16:01:12 -07:00
return errors . New ( "failed to apply the new configuration" )
2019-02-13 05:24:22 -08:00
}
2017-11-25 05:13:54 -08:00
return nil
}
2018-04-09 07:18:25 -07:00
// TargetsAll returns active and dropped targets grouped by job_name.
func ( m * Manager ) TargetsAll ( ) map [ string ] [ ] * Target {
2018-09-26 02:20:56 -07:00
m . mtxScrape . Lock ( )
defer m . mtxScrape . Unlock ( )
2018-01-17 03:46:17 -08:00
2018-09-26 02:20:56 -07:00
targets := make ( map [ string ] [ ] * Target , len ( m . scrapePools ) )
for tset , sp := range m . scrapePools {
targets [ tset ] = append ( sp . ActiveTargets ( ) , sp . DroppedTargets ( ) ... )
}
return targets
2018-04-09 07:18:25 -07:00
}
2018-01-17 03:46:17 -08:00
2022-12-23 02:55:08 -08:00
// ScrapePools returns the list of all scrape pool names.
func ( m * Manager ) ScrapePools ( ) [ ] string {
m . mtxScrape . Lock ( )
defer m . mtxScrape . Unlock ( )
names := make ( [ ] string , 0 , len ( m . scrapePools ) )
for name := range m . scrapePools {
names = append ( names , name )
}
return names
}
2018-09-26 02:20:56 -07:00
// TargetsActive returns the active targets currently being scraped.
func ( m * Manager ) TargetsActive ( ) map [ string ] [ ] * Target {
m . mtxScrape . Lock ( )
defer m . mtxScrape . Unlock ( )
2017-11-25 05:13:54 -08:00
2018-09-26 02:20:56 -07:00
targets := make ( map [ string ] [ ] * Target , len ( m . scrapePools ) )
for tset , sp := range m . scrapePools {
2023-11-20 11:28:05 -08:00
targets [ tset ] = sp . ActiveTargets ( )
2018-02-21 09:26:18 -08:00
}
2018-09-26 02:20:56 -07:00
return targets
2018-02-21 09:26:18 -08:00
}
2023-08-14 07:39:25 -07:00
// TargetsDropped returns the dropped targets during relabelling, subject to KeepDroppedTargets limit.
2018-09-26 02:20:56 -07:00
func ( m * Manager ) TargetsDropped ( ) map [ string ] [ ] * Target {
2018-04-09 07:18:25 -07:00
m . mtxScrape . Lock ( )
defer m . mtxScrape . Unlock ( )
2017-11-25 05:13:54 -08:00
2018-09-26 02:20:56 -07:00
targets := make ( map [ string ] [ ] * Target , len ( m . scrapePools ) )
for tset , sp := range m . scrapePools {
targets [ tset ] = sp . DroppedTargets ( )
2018-01-14 11:42:31 -08:00
}
2018-09-26 02:20:56 -07:00
return targets
2017-11-25 05:13:54 -08:00
}
2023-08-14 07:39:25 -07:00
func ( m * Manager ) TargetsDroppedCounts ( ) map [ string ] int {
m . mtxScrape . Lock ( )
defer m . mtxScrape . Unlock ( )
counts := make ( map [ string ] int , len ( m . scrapePools ) )
for tset , sp := range m . scrapePools {
counts [ tset ] = sp . droppedTargetsCount
}
return counts
}