2016-02-17 10:33:17 -08:00
|
|
|
// Copyright 2016 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2018-02-01 01:55:07 -08:00
|
|
|
package scrape
|
2016-02-17 10:33:17 -08:00
|
|
|
|
2016-02-22 07:46:55 -08:00
|
|
|
import (
|
2017-02-22 04:00:51 -08:00
|
|
|
"bufio"
|
2017-01-15 08:33:07 -08:00
|
|
|
"bytes"
|
2017-02-22 04:00:51 -08:00
|
|
|
"compress/gzip"
|
2017-10-24 21:21:42 -07:00
|
|
|
"context"
|
2016-02-28 10:21:50 -08:00
|
|
|
"fmt"
|
|
|
|
"io"
|
2019-04-18 01:50:37 -07:00
|
|
|
"io/ioutil"
|
2017-04-13 10:07:23 -07:00
|
|
|
"math"
|
2016-02-28 10:21:50 -08:00
|
|
|
"net/http"
|
2020-01-22 04:13:47 -08:00
|
|
|
"reflect"
|
2021-06-18 00:38:12 -07:00
|
|
|
"strconv"
|
2016-02-22 07:46:55 -08:00
|
|
|
"sync"
|
|
|
|
"time"
|
2017-02-22 04:00:51 -08:00
|
|
|
"unsafe"
|
2016-02-17 10:33:17 -08:00
|
|
|
|
2021-06-11 09:17:59 -07:00
|
|
|
"github.com/go-kit/log"
|
|
|
|
"github.com/go-kit/log/level"
|
2019-02-13 05:24:22 -08:00
|
|
|
"github.com/pkg/errors"
|
2016-02-22 07:46:55 -08:00
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
2018-04-25 10:19:06 -07:00
|
|
|
config_util "github.com/prometheus/common/config"
|
2017-09-08 05:34:45 -07:00
|
|
|
"github.com/prometheus/common/model"
|
2017-02-28 05:59:33 -08:00
|
|
|
"github.com/prometheus/common/version"
|
2016-02-17 10:33:17 -08:00
|
|
|
|
2016-02-23 02:56:09 -08:00
|
|
|
"github.com/prometheus/prometheus/config"
|
Refactor SD configuration to remove `config` dependency (#3629)
* refactor: move targetGroup struct and CheckOverflow() to their own package
* refactor: move auth and security related structs to a utility package, fix import error in utility package
* refactor: Azure SD, remove SD struct from config
* refactor: DNS SD, remove SD struct from config into dns package
* refactor: ec2 SD, move SD struct from config into the ec2 package
* refactor: file SD, move SD struct from config to file discovery package
* refactor: gce, move SD struct from config to gce discovery package
* refactor: move HTTPClientConfig and URL into util/config, fix import error in httputil
* refactor: consul, move SD struct from config into consul discovery package
* refactor: marathon, move SD struct from config into marathon discovery package
* refactor: triton, move SD struct from config to triton discovery package, fix test
* refactor: zookeeper, move SD structs from config to zookeeper discovery package
* refactor: openstack, remove SD struct from config, move into openstack discovery package
* refactor: kubernetes, move SD struct from config into kubernetes discovery package
* refactor: notifier, use targetgroup package instead of config
* refactor: tests for file, marathon, triton SD - use targetgroup package instead of config.TargetGroup
* refactor: retrieval, use targetgroup package instead of config.TargetGroup
* refactor: storage, use config util package
* refactor: discovery manager, use targetgroup package instead of config.TargetGroup
* refactor: use HTTPClient and TLS config from configUtil instead of config
* refactor: tests, use targetgroup package instead of config.TargetGroup
* refactor: fix tagetgroup.Group pointers that were removed by mistake
* refactor: openstack, kubernetes: drop prefixes
* refactor: remove import aliases forced due to vscode bug
* refactor: move main SD struct out of config into discovery/config
* refactor: rename configUtil to config_util
* refactor: rename yamlUtil to yaml_config
* refactor: kubernetes, remove prefixes
* refactor: move the TargetGroup package to discovery/
* refactor: fix order of imports
2017-12-29 12:01:34 -08:00
|
|
|
"github.com/prometheus/prometheus/discovery/targetgroup"
|
2021-03-16 02:47:45 -07:00
|
|
|
"github.com/prometheus/prometheus/pkg/exemplar"
|
2016-12-29 00:27:30 -08:00
|
|
|
"github.com/prometheus/prometheus/pkg/labels"
|
2017-09-07 05:43:21 -07:00
|
|
|
"github.com/prometheus/prometheus/pkg/pool"
|
2017-09-08 05:34:45 -07:00
|
|
|
"github.com/prometheus/prometheus/pkg/relabel"
|
2017-01-15 08:33:07 -08:00
|
|
|
"github.com/prometheus/prometheus/pkg/textparse"
|
|
|
|
"github.com/prometheus/prometheus/pkg/timestamp"
|
2017-04-13 10:07:23 -07:00
|
|
|
"github.com/prometheus/prometheus/pkg/value"
|
2016-02-22 07:46:55 -08:00
|
|
|
"github.com/prometheus/prometheus/storage"
|
|
|
|
)
|
2016-02-17 10:33:17 -08:00
|
|
|
|
2020-09-25 06:44:47 -07:00
|
|
|
// Temporary tolerance for scrape appends timestamps alignment, to enable better
|
|
|
|
// compression at the TSDB level.
|
|
|
|
// See https://github.com/prometheus/prometheus/issues/7846
|
2020-10-07 09:25:52 -07:00
|
|
|
const scrapeTimestampTolerance = 2 * time.Millisecond
|
|
|
|
|
2020-10-07 12:44:36 -07:00
|
|
|
// AlignScrapeTimestamps enables the tolerance for scrape appends timestamps described above.
|
2020-10-07 09:25:52 -07:00
|
|
|
var AlignScrapeTimestamps = true
|
2020-09-25 06:44:47 -07:00
|
|
|
|
2020-03-01 23:18:05 -08:00
|
|
|
var errNameLabelMandatory = fmt.Errorf("missing metric name (%s label)", labels.MetricName)
|
|
|
|
|
2016-02-22 07:46:55 -08:00
|
|
|
var (
|
|
|
|
targetIntervalLength = prometheus.NewSummaryVec(
|
|
|
|
prometheus.SummaryOpts{
|
2016-11-23 00:17:04 -08:00
|
|
|
Name: "prometheus_target_interval_length_seconds",
|
2016-02-22 07:46:55 -08:00
|
|
|
Help: "Actual intervals between scrapes.",
|
|
|
|
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
|
|
|
|
},
|
2016-11-23 00:17:04 -08:00
|
|
|
[]string{"interval"},
|
2016-02-22 07:46:55 -08:00
|
|
|
)
|
2016-03-09 07:33:10 -08:00
|
|
|
targetReloadIntervalLength = prometheus.NewSummaryVec(
|
|
|
|
prometheus.SummaryOpts{
|
2016-11-23 00:17:04 -08:00
|
|
|
Name: "prometheus_target_reload_length_seconds",
|
2016-03-09 07:33:10 -08:00
|
|
|
Help: "Actual interval to reload the scrape pool with a given configuration.",
|
|
|
|
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
|
|
|
|
},
|
2016-11-23 00:17:04 -08:00
|
|
|
[]string{"interval"},
|
2016-03-09 07:33:10 -08:00
|
|
|
)
|
2019-02-13 05:24:22 -08:00
|
|
|
targetScrapePools = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrape_pools_total",
|
2019-07-29 09:08:54 -07:00
|
|
|
Help: "Total number of scrape pool creation attempts.",
|
2019-02-13 05:24:22 -08:00
|
|
|
},
|
|
|
|
)
|
|
|
|
targetScrapePoolsFailed = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrape_pools_failed_total",
|
|
|
|
Help: "Total number of scrape pool creations that failed.",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
targetScrapePoolReloads = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrape_pool_reloads_total",
|
2020-07-30 05:20:24 -07:00
|
|
|
Help: "Total number of scrape pool reloads.",
|
2019-02-13 05:24:22 -08:00
|
|
|
},
|
|
|
|
)
|
|
|
|
targetScrapePoolReloadsFailed = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrape_pool_reloads_failed_total",
|
2020-07-30 05:20:24 -07:00
|
|
|
Help: "Total number of failed scrape pool reloads.",
|
2019-02-13 05:24:22 -08:00
|
|
|
},
|
|
|
|
)
|
2020-07-30 05:20:24 -07:00
|
|
|
targetScrapePoolExceededTargetLimit = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrape_pool_exceeded_target_limit_total",
|
|
|
|
Help: "Total number of times scrape pools hit the target limit, during sync or config reload.",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
targetScrapePoolTargetLimit = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
Name: "prometheus_target_scrape_pool_target_limit",
|
|
|
|
Help: "Maximum number of targets allowed in this scrape pool.",
|
|
|
|
},
|
|
|
|
[]string{"scrape_job"},
|
|
|
|
)
|
|
|
|
targetScrapePoolTargetsAdded = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
Name: "prometheus_target_scrape_pool_targets",
|
|
|
|
Help: "Current number of targets in this scrape pool.",
|
|
|
|
},
|
|
|
|
[]string{"scrape_job"},
|
|
|
|
)
|
2016-03-11 03:22:23 -08:00
|
|
|
targetSyncIntervalLength = prometheus.NewSummaryVec(
|
|
|
|
prometheus.SummaryOpts{
|
2016-11-23 00:17:04 -08:00
|
|
|
Name: "prometheus_target_sync_length_seconds",
|
2016-03-11 03:22:23 -08:00
|
|
|
Help: "Actual interval to sync the scrape pool.",
|
|
|
|
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
|
|
|
|
},
|
2016-11-23 00:17:04 -08:00
|
|
|
[]string{"scrape_job"},
|
2016-03-11 03:22:23 -08:00
|
|
|
)
|
|
|
|
targetScrapePoolSyncsCounter = prometheus.NewCounterVec(
|
|
|
|
prometheus.CounterOpts{
|
2016-11-23 00:17:04 -08:00
|
|
|
Name: "prometheus_target_scrape_pool_sync_total",
|
|
|
|
Help: "Total number of syncs that were executed on a scrape pool.",
|
2016-03-11 03:22:23 -08:00
|
|
|
},
|
2016-11-23 00:17:04 -08:00
|
|
|
[]string{"scrape_job"},
|
2016-03-11 03:22:23 -08:00
|
|
|
)
|
2021-05-15 19:19:22 -07:00
|
|
|
targetScrapeExceededBodySizeLimit = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrapes_exceeded_body_size_limit_total",
|
|
|
|
Help: "Total number of scrapes that hit the body size limit",
|
|
|
|
},
|
|
|
|
)
|
2016-12-16 07:08:50 -08:00
|
|
|
targetScrapeSampleLimit = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrapes_exceeded_sample_limit_total",
|
|
|
|
Help: "Total number of scrapes that hit the sample limit and were rejected.",
|
|
|
|
},
|
|
|
|
)
|
2017-08-02 05:10:18 -07:00
|
|
|
targetScrapeSampleDuplicate = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrapes_sample_duplicate_timestamp_total",
|
2021-03-16 02:47:45 -07:00
|
|
|
Help: "Total number of samples rejected due to duplicate timestamps but different values.",
|
2017-08-02 05:10:18 -07:00
|
|
|
},
|
|
|
|
)
|
|
|
|
targetScrapeSampleOutOfOrder = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrapes_sample_out_of_order_total",
|
2021-03-16 02:47:45 -07:00
|
|
|
Help: "Total number of samples rejected due to not being out of the expected order.",
|
2017-08-02 05:10:18 -07:00
|
|
|
},
|
|
|
|
)
|
|
|
|
targetScrapeSampleOutOfBounds = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrapes_sample_out_of_bounds_total",
|
2021-03-16 02:47:45 -07:00
|
|
|
Help: "Total number of samples rejected due to timestamp falling outside of the time bounds.",
|
2017-08-02 05:10:18 -07:00
|
|
|
},
|
|
|
|
)
|
2019-03-28 10:52:46 -07:00
|
|
|
targetScrapeCacheFlushForced = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrapes_cache_flush_forced_total",
|
|
|
|
Help: "How many times a scrape cache was flushed due to getting big while scrapes are failing.",
|
|
|
|
},
|
|
|
|
)
|
2021-03-16 02:47:45 -07:00
|
|
|
targetScrapeExemplarOutOfOrder = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrapes_exemplar_out_of_order_total",
|
|
|
|
Help: "Total number of exemplar rejected due to not being out of the expected order.",
|
|
|
|
},
|
|
|
|
)
|
2021-05-06 01:56:21 -07:00
|
|
|
targetScrapePoolExceededLabelLimits = prometheus.NewCounter(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_scrape_pool_exceeded_label_limits_total",
|
|
|
|
Help: "Total number of times scrape pools hit the label limits, during sync or config reload.",
|
|
|
|
},
|
|
|
|
)
|
2021-05-28 14:50:59 -07:00
|
|
|
targetSyncFailed = prometheus.NewCounterVec(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "prometheus_target_sync_failed_total",
|
|
|
|
Help: "Total number of target sync failures.",
|
|
|
|
},
|
|
|
|
[]string{"scrape_job"},
|
|
|
|
)
|
2016-02-22 07:46:55 -08:00
|
|
|
)
|
2016-02-17 10:33:17 -08:00
|
|
|
|
2016-02-22 07:46:55 -08:00
|
|
|
func init() {
|
2020-01-29 03:13:18 -08:00
|
|
|
prometheus.MustRegister(
|
|
|
|
targetIntervalLength,
|
|
|
|
targetReloadIntervalLength,
|
|
|
|
targetScrapePools,
|
|
|
|
targetScrapePoolsFailed,
|
|
|
|
targetScrapePoolReloads,
|
|
|
|
targetScrapePoolReloadsFailed,
|
|
|
|
targetSyncIntervalLength,
|
|
|
|
targetScrapePoolSyncsCounter,
|
2021-05-15 19:19:22 -07:00
|
|
|
targetScrapeExceededBodySizeLimit,
|
2020-01-29 03:13:18 -08:00
|
|
|
targetScrapeSampleLimit,
|
|
|
|
targetScrapeSampleDuplicate,
|
|
|
|
targetScrapeSampleOutOfOrder,
|
|
|
|
targetScrapeSampleOutOfBounds,
|
2020-07-30 05:20:24 -07:00
|
|
|
targetScrapePoolExceededTargetLimit,
|
|
|
|
targetScrapePoolTargetLimit,
|
|
|
|
targetScrapePoolTargetsAdded,
|
2020-01-29 03:13:18 -08:00
|
|
|
targetScrapeCacheFlushForced,
|
|
|
|
targetMetadataCache,
|
2021-03-16 02:47:45 -07:00
|
|
|
targetScrapeExemplarOutOfOrder,
|
2021-05-06 01:56:21 -07:00
|
|
|
targetScrapePoolExceededLabelLimits,
|
2021-05-28 14:50:59 -07:00
|
|
|
targetSyncFailed,
|
2020-01-29 03:13:18 -08:00
|
|
|
)
|
2016-02-22 07:46:55 -08:00
|
|
|
}
|
2016-02-17 10:33:17 -08:00
|
|
|
|
2016-02-22 07:46:55 -08:00
|
|
|
// scrapePool manages scrapes for sets of targets.
|
|
|
|
type scrapePool struct {
|
2020-02-06 07:58:38 -08:00
|
|
|
appendable storage.Appendable
|
2017-09-08 05:34:45 -07:00
|
|
|
logger log.Logger
|
2020-11-12 08:06:25 -08:00
|
|
|
cancel context.CancelFunc
|
2016-02-17 10:33:17 -08:00
|
|
|
|
2020-11-12 08:06:25 -08:00
|
|
|
// mtx must not be taken after targetMtx.
|
2021-07-27 03:48:55 -07:00
|
|
|
mtx sync.Mutex
|
|
|
|
config *config.ScrapeConfig
|
|
|
|
client *http.Client
|
|
|
|
loops map[uint64]loop
|
2020-10-26 07:46:20 -07:00
|
|
|
|
2020-11-12 08:06:25 -08:00
|
|
|
targetMtx sync.Mutex
|
|
|
|
// activeTargets and loops must always be synchronized to have the same
|
2016-02-28 10:56:18 -08:00
|
|
|
// set of hashes.
|
2018-09-26 02:20:56 -07:00
|
|
|
activeTargets map[uint64]*Target
|
2017-12-04 07:12:28 -08:00
|
|
|
droppedTargets []*Target
|
2016-02-28 00:51:02 -08:00
|
|
|
|
|
|
|
// Constructor for new scrape loops. This is settable for testing convenience.
|
2019-03-12 03:26:18 -07:00
|
|
|
newLoop func(scrapeLoopOptions) loop
|
|
|
|
}
|
|
|
|
|
2021-05-06 01:56:21 -07:00
|
|
|
type labelLimits struct {
|
|
|
|
labelLimit int
|
|
|
|
labelNameLengthLimit int
|
|
|
|
labelValueLengthLimit int
|
|
|
|
}
|
|
|
|
|
2019-03-12 03:26:18 -07:00
|
|
|
type scrapeLoopOptions struct {
|
2019-03-15 03:04:15 -07:00
|
|
|
target *Target
|
|
|
|
scraper scraper
|
2021-05-06 01:56:21 -07:00
|
|
|
sampleLimit int
|
|
|
|
labelLimits *labelLimits
|
2019-03-15 03:04:15 -07:00
|
|
|
honorLabels bool
|
|
|
|
honorTimestamps bool
|
2021-08-31 08:37:32 -07:00
|
|
|
interval time.Duration
|
|
|
|
timeout time.Duration
|
2019-03-15 03:04:15 -07:00
|
|
|
mrc []*relabel.Config
|
2020-01-22 04:13:47 -08:00
|
|
|
cache *scrapeCache
|
2016-02-22 07:46:55 -08:00
|
|
|
}
|
2016-02-17 10:33:17 -08:00
|
|
|
|
2017-09-08 05:34:45 -07:00
|
|
|
const maxAheadTime = 10 * time.Minute
|
|
|
|
|
|
|
|
type labelsMutator func(labels.Labels) labels.Labels
|
|
|
|
|
2020-02-06 07:58:38 -08:00
|
|
|
func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, jitterSeed uint64, logger log.Logger) (*scrapePool, error) {
|
2019-02-13 05:24:22 -08:00
|
|
|
targetScrapePools.Inc()
|
2017-09-15 10:45:27 -07:00
|
|
|
if logger == nil {
|
|
|
|
logger = log.NewNopLogger()
|
|
|
|
}
|
|
|
|
|
2021-04-21 03:19:16 -07:00
|
|
|
client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, cfg.JobName, config_util.WithHTTP2Disabled())
|
2016-02-28 10:21:50 -08:00
|
|
|
if err != nil {
|
2019-02-13 05:24:22 -08:00
|
|
|
targetScrapePoolsFailed.Inc()
|
|
|
|
return nil, errors.Wrap(err, "error creating HTTP client")
|
2016-02-28 10:21:50 -08:00
|
|
|
}
|
2017-05-26 01:44:48 -07:00
|
|
|
|
2018-03-14 19:15:35 -07:00
|
|
|
buffers := pool.New(1e3, 100e6, 3, func(sz int) interface{} { return make([]byte, 0, sz) })
|
2017-05-26 01:44:48 -07:00
|
|
|
|
2017-12-03 09:14:08 -08:00
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
2017-09-08 05:34:45 -07:00
|
|
|
sp := &scrapePool{
|
2018-09-26 02:20:56 -07:00
|
|
|
cancel: cancel,
|
|
|
|
appendable: app,
|
|
|
|
config: cfg,
|
|
|
|
client: client,
|
|
|
|
activeTargets: map[uint64]*Target{},
|
|
|
|
loops: map[uint64]loop{},
|
|
|
|
logger: logger,
|
2016-02-22 07:46:55 -08:00
|
|
|
}
|
2019-03-12 03:26:18 -07:00
|
|
|
sp.newLoop = func(opts scrapeLoopOptions) loop {
|
2018-05-18 00:32:11 -07:00
|
|
|
// Update the targets retrieval function for metadata to a new scrape cache.
|
2020-01-22 04:13:47 -08:00
|
|
|
cache := opts.cache
|
|
|
|
if cache == nil {
|
|
|
|
cache = newScrapeCache()
|
|
|
|
}
|
2019-12-04 07:18:27 -08:00
|
|
|
opts.target.SetMetadataStore(cache)
|
2018-05-18 00:32:11 -07:00
|
|
|
|
2017-11-26 07:15:15 -08:00
|
|
|
return newScrapeLoop(
|
2017-12-03 09:14:08 -08:00
|
|
|
ctx,
|
2019-03-12 03:26:18 -07:00
|
|
|
opts.scraper,
|
|
|
|
log.With(logger, "target", opts.target),
|
2017-09-08 05:34:45 -07:00
|
|
|
buffers,
|
2019-03-12 03:26:18 -07:00
|
|
|
func(l labels.Labels) labels.Labels {
|
|
|
|
return mutateSampleLabels(l, opts.target, opts.honorLabels, opts.mrc)
|
|
|
|
},
|
|
|
|
func(l labels.Labels) labels.Labels { return mutateReportSampleLabels(l, opts.target) },
|
2021-05-06 01:56:21 -07:00
|
|
|
func(ctx context.Context) storage.Appender { return appender(app.Appender(ctx), opts.sampleLimit) },
|
2018-05-18 00:32:11 -07:00
|
|
|
cache,
|
2019-03-12 03:46:15 -07:00
|
|
|
jitterSeed,
|
2019-03-15 03:04:15 -07:00
|
|
|
opts.honorTimestamps,
|
2021-05-06 01:56:21 -07:00
|
|
|
opts.labelLimits,
|
2021-08-31 08:37:32 -07:00
|
|
|
opts.interval,
|
|
|
|
opts.timeout,
|
2017-09-08 05:34:45 -07:00
|
|
|
)
|
2016-02-22 07:46:55 -08:00
|
|
|
}
|
2017-09-08 05:34:45 -07:00
|
|
|
|
2019-02-13 05:24:22 -08:00
|
|
|
return sp, nil
|
2016-02-22 07:46:55 -08:00
|
|
|
}
|
2016-02-17 10:33:17 -08:00
|
|
|
|
2018-09-26 02:20:56 -07:00
|
|
|
func (sp *scrapePool) ActiveTargets() []*Target {
|
2020-10-26 07:46:20 -07:00
|
|
|
sp.targetMtx.Lock()
|
|
|
|
defer sp.targetMtx.Unlock()
|
2018-09-26 02:20:56 -07:00
|
|
|
|
|
|
|
var tActive []*Target
|
|
|
|
for _, t := range sp.activeTargets {
|
|
|
|
tActive = append(tActive, t)
|
|
|
|
}
|
|
|
|
return tActive
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sp *scrapePool) DroppedTargets() []*Target {
|
2020-10-26 07:46:20 -07:00
|
|
|
sp.targetMtx.Lock()
|
|
|
|
defer sp.targetMtx.Unlock()
|
2018-09-26 02:20:56 -07:00
|
|
|
return sp.droppedTargets
|
|
|
|
}
|
|
|
|
|
2016-02-23 05:37:25 -08:00
|
|
|
// stop terminates all scrape loops and returns after they all terminated.
|
2016-02-22 07:46:55 -08:00
|
|
|
func (sp *scrapePool) stop() {
|
2020-11-12 08:06:25 -08:00
|
|
|
sp.mtx.Lock()
|
|
|
|
defer sp.mtx.Unlock()
|
2017-12-03 09:14:08 -08:00
|
|
|
sp.cancel()
|
2016-02-22 07:46:55 -08:00
|
|
|
var wg sync.WaitGroup
|
2016-02-17 10:33:17 -08:00
|
|
|
|
2020-10-26 07:46:20 -07:00
|
|
|
sp.targetMtx.Lock()
|
2016-02-17 10:33:17 -08:00
|
|
|
|
2016-02-28 00:51:02 -08:00
|
|
|
for fp, l := range sp.loops {
|
2016-02-23 05:37:25 -08:00
|
|
|
wg.Add(1)
|
2016-02-17 10:33:17 -08:00
|
|
|
|
2016-02-23 05:37:25 -08:00
|
|
|
go func(l loop) {
|
|
|
|
l.stop()
|
|
|
|
wg.Done()
|
|
|
|
}(l)
|
2016-02-28 00:51:02 -08:00
|
|
|
|
|
|
|
delete(sp.loops, fp)
|
2018-09-26 02:20:56 -07:00
|
|
|
delete(sp.activeTargets, fp)
|
2016-02-22 07:46:55 -08:00
|
|
|
}
|
2020-10-26 07:46:20 -07:00
|
|
|
|
|
|
|
sp.targetMtx.Unlock()
|
|
|
|
|
2016-02-22 07:46:55 -08:00
|
|
|
wg.Wait()
|
2019-04-10 05:20:00 -07:00
|
|
|
sp.client.CloseIdleConnections()
|
2020-07-30 05:20:24 -07:00
|
|
|
|
|
|
|
if sp.config != nil {
|
|
|
|
targetScrapePoolSyncsCounter.DeleteLabelValues(sp.config.JobName)
|
|
|
|
targetScrapePoolTargetLimit.DeleteLabelValues(sp.config.JobName)
|
|
|
|
targetScrapePoolTargetsAdded.DeleteLabelValues(sp.config.JobName)
|
|
|
|
targetSyncIntervalLength.DeleteLabelValues(sp.config.JobName)
|
2021-05-28 14:50:59 -07:00
|
|
|
targetSyncFailed.DeleteLabelValues(sp.config.JobName)
|
2020-07-30 05:20:24 -07:00
|
|
|
}
|
2016-02-22 07:46:55 -08:00
|
|
|
}
|
|
|
|
|
2016-02-23 05:37:25 -08:00
|
|
|
// reload the scrape pool with the given scrape configuration. The target state is preserved
|
|
|
|
// but all scrape loops are restarted with the new scrape configuration.
|
2017-05-10 08:59:02 -07:00
|
|
|
// This method returns after all scrape loops that were stopped have stopped scraping.
|
2019-02-13 05:24:22 -08:00
|
|
|
func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error {
|
2020-11-12 08:06:25 -08:00
|
|
|
sp.mtx.Lock()
|
|
|
|
defer sp.mtx.Unlock()
|
2019-02-13 05:24:22 -08:00
|
|
|
targetScrapePoolReloads.Inc()
|
2016-03-09 07:33:10 -08:00
|
|
|
start := time.Now()
|
2016-11-22 03:48:30 -08:00
|
|
|
|
2021-04-21 03:19:16 -07:00
|
|
|
client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, cfg.JobName, config_util.WithHTTP2Disabled())
|
2016-02-28 10:21:50 -08:00
|
|
|
if err != nil {
|
2019-02-13 05:24:22 -08:00
|
|
|
targetScrapePoolReloadsFailed.Inc()
|
|
|
|
return errors.Wrap(err, "error creating HTTP client")
|
2016-02-28 10:21:50 -08:00
|
|
|
}
|
2020-01-22 04:13:47 -08:00
|
|
|
|
|
|
|
reuseCache := reusableCache(sp.config, cfg)
|
2016-02-23 04:34:24 -08:00
|
|
|
sp.config = cfg
|
2019-04-10 05:20:00 -07:00
|
|
|
oldClient := sp.client
|
2016-02-28 10:21:50 -08:00
|
|
|
sp.client = client
|
2016-02-23 04:34:24 -08:00
|
|
|
|
2020-07-30 05:20:24 -07:00
|
|
|
targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit))
|
|
|
|
|
2016-02-23 05:37:25 -08:00
|
|
|
var (
|
2021-05-15 19:19:22 -07:00
|
|
|
wg sync.WaitGroup
|
|
|
|
interval = time.Duration(sp.config.ScrapeInterval)
|
|
|
|
timeout = time.Duration(sp.config.ScrapeTimeout)
|
|
|
|
bodySizeLimit = int64(sp.config.BodySizeLimit)
|
|
|
|
sampleLimit = int(sp.config.SampleLimit)
|
|
|
|
labelLimits = &labelLimits{
|
2021-05-06 01:56:21 -07:00
|
|
|
labelLimit: int(sp.config.LabelLimit),
|
|
|
|
labelNameLengthLimit: int(sp.config.LabelNameLengthLimit),
|
|
|
|
labelValueLengthLimit: int(sp.config.LabelValueLengthLimit),
|
|
|
|
}
|
2019-03-15 03:04:15 -07:00
|
|
|
honorLabels = sp.config.HonorLabels
|
|
|
|
honorTimestamps = sp.config.HonorTimestamps
|
|
|
|
mrc = sp.config.MetricRelabelConfigs
|
2016-02-23 05:37:25 -08:00
|
|
|
)
|
2016-02-23 04:34:24 -08:00
|
|
|
|
2020-10-26 07:46:20 -07:00
|
|
|
sp.targetMtx.Lock()
|
|
|
|
|
2020-07-30 05:20:24 -07:00
|
|
|
forcedErr := sp.refreshTargetLimitErr()
|
2016-02-23 05:37:25 -08:00
|
|
|
for fp, oldLoop := range sp.loops {
|
2020-01-22 04:13:47 -08:00
|
|
|
var cache *scrapeCache
|
|
|
|
if oc := oldLoop.getCache(); reuseCache && oc != nil {
|
2020-03-20 09:43:26 -07:00
|
|
|
oldLoop.disableEndOfRunStalenessMarkers()
|
2020-01-22 04:13:47 -08:00
|
|
|
cache = oc
|
|
|
|
} else {
|
|
|
|
cache = newScrapeCache()
|
|
|
|
}
|
2021-08-31 08:37:32 -07:00
|
|
|
|
2016-02-23 05:37:25 -08:00
|
|
|
var (
|
2018-09-26 02:20:56 -07:00
|
|
|
t = sp.activeTargets[fp]
|
2021-05-15 19:19:22 -07:00
|
|
|
s = &targetScraper{Target: t, client: sp.client, timeout: timeout, bodySizeLimit: bodySizeLimit}
|
2019-03-12 03:26:18 -07:00
|
|
|
newLoop = sp.newLoop(scrapeLoopOptions{
|
2019-03-15 03:04:15 -07:00
|
|
|
target: t,
|
|
|
|
scraper: s,
|
2021-05-06 01:56:21 -07:00
|
|
|
sampleLimit: sampleLimit,
|
|
|
|
labelLimits: labelLimits,
|
2019-03-15 03:04:15 -07:00
|
|
|
honorLabels: honorLabels,
|
|
|
|
honorTimestamps: honorTimestamps,
|
|
|
|
mrc: mrc,
|
2020-01-22 04:13:47 -08:00
|
|
|
cache: cache,
|
2021-08-31 08:37:32 -07:00
|
|
|
interval: interval,
|
|
|
|
timeout: timeout,
|
2019-03-12 03:26:18 -07:00
|
|
|
})
|
2016-02-23 05:37:25 -08:00
|
|
|
)
|
|
|
|
wg.Add(1)
|
2016-02-23 04:34:24 -08:00
|
|
|
|
2016-02-23 05:37:25 -08:00
|
|
|
go func(oldLoop, newLoop loop) {
|
|
|
|
oldLoop.stop()
|
|
|
|
wg.Done()
|
2016-02-23 04:34:24 -08:00
|
|
|
|
2020-07-30 05:20:24 -07:00
|
|
|
newLoop.setForcedError(forcedErr)
|
2021-08-31 08:37:32 -07:00
|
|
|
newLoop.run(nil)
|
2016-02-23 05:37:25 -08:00
|
|
|
}(oldLoop, newLoop)
|
|
|
|
|
|
|
|
sp.loops[fp] = newLoop
|
2016-02-23 04:34:24 -08:00
|
|
|
}
|
|
|
|
|
2020-10-26 07:46:20 -07:00
|
|
|
sp.targetMtx.Unlock()
|
|
|
|
|
2016-02-23 04:34:24 -08:00
|
|
|
wg.Wait()
|
2019-04-10 05:20:00 -07:00
|
|
|
oldClient.CloseIdleConnections()
|
2016-03-09 07:33:10 -08:00
|
|
|
targetReloadIntervalLength.WithLabelValues(interval.String()).Observe(
|
2016-07-07 06:24:35 -07:00
|
|
|
time.Since(start).Seconds(),
|
2016-03-09 07:33:10 -08:00
|
|
|
)
|
2019-02-13 05:24:22 -08:00
|
|
|
return nil
|
2016-02-23 04:34:24 -08:00
|
|
|
}
|
|
|
|
|
2016-11-22 03:48:30 -08:00
|
|
|
// Sync converts target groups into actual scrape targets and synchronizes
|
2018-04-09 07:18:25 -07:00
|
|
|
// the currently running scraper with the resulting set and returns all scraped and dropped targets.
|
2018-09-26 02:20:56 -07:00
|
|
|
func (sp *scrapePool) Sync(tgs []*targetgroup.Group) {
|
2020-11-12 08:06:25 -08:00
|
|
|
sp.mtx.Lock()
|
|
|
|
defer sp.mtx.Unlock()
|
2016-11-22 03:48:30 -08:00
|
|
|
start := time.Now()
|
|
|
|
|
2020-10-26 07:46:20 -07:00
|
|
|
sp.targetMtx.Lock()
|
2016-11-22 03:48:30 -08:00
|
|
|
var all []*Target
|
2018-01-04 06:13:31 -08:00
|
|
|
sp.droppedTargets = []*Target{}
|
2016-11-22 03:48:30 -08:00
|
|
|
for _, tg := range tgs {
|
2021-05-28 14:50:59 -07:00
|
|
|
targets, failures := targetsFromGroup(tg, sp.config)
|
|
|
|
for _, err := range failures {
|
|
|
|
level.Error(sp.logger).Log("msg", "Creating target failed", "err", err)
|
2016-11-22 03:48:30 -08:00
|
|
|
}
|
2021-05-28 14:50:59 -07:00
|
|
|
targetSyncFailed.WithLabelValues(sp.config.JobName).Add(float64(len(failures)))
|
2017-12-04 07:12:28 -08:00
|
|
|
for _, t := range targets {
|
|
|
|
if t.Labels().Len() > 0 {
|
|
|
|
all = append(all, t)
|
|
|
|
} else if t.DiscoveredLabels().Len() > 0 {
|
|
|
|
sp.droppedTargets = append(sp.droppedTargets, t)
|
|
|
|
}
|
|
|
|
}
|
2016-11-22 03:48:30 -08:00
|
|
|
}
|
2020-10-26 07:46:20 -07:00
|
|
|
sp.targetMtx.Unlock()
|
2016-11-22 03:48:30 -08:00
|
|
|
sp.sync(all)
|
|
|
|
|
|
|
|
targetSyncIntervalLength.WithLabelValues(sp.config.JobName).Observe(
|
|
|
|
time.Since(start).Seconds(),
|
|
|
|
)
|
|
|
|
targetScrapePoolSyncsCounter.WithLabelValues(sp.config.JobName).Inc()
|
|
|
|
}
|
|
|
|
|
2016-02-23 05:37:25 -08:00
|
|
|
// sync takes a list of potentially duplicated targets, deduplicates them, starts
|
|
|
|
// scrape loops for new targets, and stops scrape loops for disappeared targets.
|
|
|
|
// It returns after all stopped scrape loops terminated.
|
|
|
|
func (sp *scrapePool) sync(targets []*Target) {
|
2016-02-22 07:46:55 -08:00
|
|
|
var (
|
2021-05-15 19:19:22 -07:00
|
|
|
uniqueLoops = make(map[uint64]loop)
|
|
|
|
interval = time.Duration(sp.config.ScrapeInterval)
|
|
|
|
timeout = time.Duration(sp.config.ScrapeTimeout)
|
|
|
|
bodySizeLimit = int64(sp.config.BodySizeLimit)
|
|
|
|
sampleLimit = int(sp.config.SampleLimit)
|
|
|
|
labelLimits = &labelLimits{
|
2021-05-06 01:56:21 -07:00
|
|
|
labelLimit: int(sp.config.LabelLimit),
|
|
|
|
labelNameLengthLimit: int(sp.config.LabelNameLengthLimit),
|
|
|
|
labelValueLengthLimit: int(sp.config.LabelValueLengthLimit),
|
|
|
|
}
|
2019-03-15 03:04:15 -07:00
|
|
|
honorLabels = sp.config.HonorLabels
|
|
|
|
honorTimestamps = sp.config.HonorTimestamps
|
|
|
|
mrc = sp.config.MetricRelabelConfigs
|
2016-02-22 07:46:55 -08:00
|
|
|
)
|
|
|
|
|
2020-10-26 07:46:20 -07:00
|
|
|
sp.targetMtx.Lock()
|
2016-02-23 05:37:25 -08:00
|
|
|
for _, t := range targets {
|
2016-02-28 10:56:18 -08:00
|
|
|
hash := t.hash()
|
2016-02-23 05:37:25 -08:00
|
|
|
|
2018-09-26 02:20:56 -07:00
|
|
|
if _, ok := sp.activeTargets[hash]; !ok {
|
2021-08-31 08:37:32 -07:00
|
|
|
// The scrape interval and timeout labels are set to the config's values initially,
|
|
|
|
// so whether changed via relabeling or not, they'll exist and hold the correct values
|
|
|
|
// for every target.
|
|
|
|
var err error
|
|
|
|
interval, timeout, err = t.intervalAndTimeout(interval, timeout)
|
|
|
|
|
2021-05-15 19:19:22 -07:00
|
|
|
s := &targetScraper{Target: t, client: sp.client, timeout: timeout, bodySizeLimit: bodySizeLimit}
|
2019-03-12 03:26:18 -07:00
|
|
|
l := sp.newLoop(scrapeLoopOptions{
|
2019-03-15 03:04:15 -07:00
|
|
|
target: t,
|
|
|
|
scraper: s,
|
2021-05-06 01:56:21 -07:00
|
|
|
sampleLimit: sampleLimit,
|
|
|
|
labelLimits: labelLimits,
|
2019-03-15 03:04:15 -07:00
|
|
|
honorLabels: honorLabels,
|
|
|
|
honorTimestamps: honorTimestamps,
|
|
|
|
mrc: mrc,
|
2021-08-31 08:37:32 -07:00
|
|
|
interval: interval,
|
|
|
|
timeout: timeout,
|
2019-03-12 03:26:18 -07:00
|
|
|
})
|
2021-08-31 08:37:32 -07:00
|
|
|
if err != nil {
|
|
|
|
l.setForcedError(err)
|
|
|
|
}
|
2016-02-23 05:37:25 -08:00
|
|
|
|
2018-09-26 02:20:56 -07:00
|
|
|
sp.activeTargets[hash] = t
|
2016-02-28 10:56:18 -08:00
|
|
|
sp.loops[hash] = l
|
2016-02-23 05:37:25 -08:00
|
|
|
|
2020-07-30 05:20:24 -07:00
|
|
|
uniqueLoops[hash] = l
|
2018-02-07 02:29:27 -08:00
|
|
|
} else {
|
2020-07-30 05:20:24 -07:00
|
|
|
// This might be a duplicated target.
|
|
|
|
if _, ok := uniqueLoops[hash]; !ok {
|
|
|
|
uniqueLoops[hash] = nil
|
|
|
|
}
|
2018-02-07 02:29:27 -08:00
|
|
|
// Need to keep the most updated labels information
|
|
|
|
// for displaying it in the Service Discovery web page.
|
2018-09-26 02:20:56 -07:00
|
|
|
sp.activeTargets[hash].SetDiscoveredLabels(t.DiscoveredLabels())
|
2016-02-22 07:46:55 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-02-23 05:37:25 -08:00
|
|
|
var wg sync.WaitGroup
|
2016-02-22 07:46:55 -08:00
|
|
|
|
2016-02-23 05:37:25 -08:00
|
|
|
// Stop and remove old targets and scraper loops.
|
2018-09-26 02:20:56 -07:00
|
|
|
for hash := range sp.activeTargets {
|
2020-07-30 05:20:24 -07:00
|
|
|
if _, ok := uniqueLoops[hash]; !ok {
|
2016-02-23 05:37:25 -08:00
|
|
|
wg.Add(1)
|
|
|
|
go func(l loop) {
|
|
|
|
l.stop()
|
2016-02-22 07:46:55 -08:00
|
|
|
wg.Done()
|
2016-02-28 10:56:18 -08:00
|
|
|
}(sp.loops[hash])
|
2016-02-23 05:37:25 -08:00
|
|
|
|
2016-02-28 10:56:18 -08:00
|
|
|
delete(sp.loops, hash)
|
2018-09-26 02:20:56 -07:00
|
|
|
delete(sp.activeTargets, hash)
|
2016-02-22 07:46:55 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-26 07:46:20 -07:00
|
|
|
sp.targetMtx.Unlock()
|
|
|
|
|
2020-07-30 05:20:24 -07:00
|
|
|
targetScrapePoolTargetsAdded.WithLabelValues(sp.config.JobName).Set(float64(len(uniqueLoops)))
|
|
|
|
forcedErr := sp.refreshTargetLimitErr()
|
|
|
|
for _, l := range sp.loops {
|
|
|
|
l.setForcedError(forcedErr)
|
|
|
|
}
|
|
|
|
for _, l := range uniqueLoops {
|
|
|
|
if l != nil {
|
2021-08-31 08:37:32 -07:00
|
|
|
go l.run(nil)
|
2020-07-30 05:20:24 -07:00
|
|
|
}
|
|
|
|
}
|
2016-02-22 07:46:55 -08:00
|
|
|
// Wait for all potentially stopped scrapers to terminate.
|
|
|
|
// This covers the case of flapping targets. If the server is under high load, a new scraper
|
|
|
|
// may be active and tries to insert. The old scraper that didn't terminate yet could still
|
|
|
|
// be inserting a previous sample set.
|
|
|
|
wg.Wait()
|
|
|
|
}
|
|
|
|
|
2020-07-30 05:20:24 -07:00
|
|
|
// refreshTargetLimitErr returns an error that can be passed to the scrape loops
|
|
|
|
// if the number of targets exceeds the configured limit.
|
|
|
|
func (sp *scrapePool) refreshTargetLimitErr() error {
|
2021-07-27 03:48:55 -07:00
|
|
|
if sp.config == nil || sp.config.TargetLimit == 0 {
|
2020-07-30 05:20:24 -07:00
|
|
|
return nil
|
|
|
|
}
|
2021-07-27 03:48:55 -07:00
|
|
|
if l := len(sp.activeTargets); l > int(sp.config.TargetLimit) {
|
2020-07-30 05:20:24 -07:00
|
|
|
targetScrapePoolExceededTargetLimit.Inc()
|
2021-07-27 03:48:55 -07:00
|
|
|
return fmt.Errorf("target_limit exceeded (number of targets: %d, limit: %d)", l, sp.config.TargetLimit)
|
2020-07-30 05:20:24 -07:00
|
|
|
}
|
2021-07-27 03:48:55 -07:00
|
|
|
return nil
|
2020-07-30 05:20:24 -07:00
|
|
|
}
|
|
|
|
|
2021-05-06 01:56:21 -07:00
|
|
|
func verifyLabelLimits(lset labels.Labels, limits *labelLimits) error {
|
|
|
|
if limits == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
met := lset.Get(labels.MetricName)
|
|
|
|
if limits.labelLimit > 0 {
|
|
|
|
nbLabels := len(lset)
|
|
|
|
if nbLabels > int(limits.labelLimit) {
|
|
|
|
return fmt.Errorf("label_limit exceeded (metric: %.50s, number of label: %d, limit: %d)", met, nbLabels, limits.labelLimit)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if limits.labelNameLengthLimit == 0 && limits.labelValueLengthLimit == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, l := range lset {
|
|
|
|
if limits.labelNameLengthLimit > 0 {
|
|
|
|
nameLength := len(l.Name)
|
|
|
|
if nameLength > int(limits.labelNameLengthLimit) {
|
|
|
|
return fmt.Errorf("label_name_length_limit exceeded (metric: %.50s, label: %.50v, name length: %d, limit: %d)", met, l, nameLength, limits.labelNameLengthLimit)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if limits.labelValueLengthLimit > 0 {
|
|
|
|
valueLength := len(l.Value)
|
|
|
|
if valueLength > int(limits.labelValueLengthLimit) {
|
|
|
|
return fmt.Errorf("label_value_length_limit exceeded (metric: %.50s, label: %.50v, value length: %d, limit: %d)", met, l, valueLength, limits.labelValueLengthLimit)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-12-18 03:26:36 -08:00
|
|
|
func mutateSampleLabels(lset labels.Labels, target *Target, honor bool, rc []*relabel.Config) labels.Labels {
|
2017-09-08 05:34:45 -07:00
|
|
|
lb := labels.NewBuilder(lset)
|
2017-01-30 08:30:28 -08:00
|
|
|
|
2018-04-12 07:54:53 -07:00
|
|
|
if honor {
|
2017-09-08 05:34:45 -07:00
|
|
|
for _, l := range target.Labels() {
|
2018-02-14 09:03:58 -08:00
|
|
|
if !lset.Has(l.Name) {
|
2017-09-08 05:34:45 -07:00
|
|
|
lb.Set(l.Name, l.Value)
|
|
|
|
}
|
2017-07-04 05:55:33 -07:00
|
|
|
}
|
2017-09-08 05:34:45 -07:00
|
|
|
} else {
|
|
|
|
for _, l := range target.Labels() {
|
2019-08-13 03:19:17 -07:00
|
|
|
// existingValue will be empty if l.Name doesn't exist.
|
|
|
|
existingValue := lset.Get(l.Name)
|
2019-11-20 07:50:05 -08:00
|
|
|
if existingValue != "" {
|
|
|
|
lb.Set(model.ExportedLabelPrefix+l.Name, existingValue)
|
|
|
|
}
|
2019-08-13 03:19:17 -07:00
|
|
|
// It is now safe to set the target label.
|
2017-09-08 05:34:45 -07:00
|
|
|
lb.Set(l.Name, l.Value)
|
2017-01-30 08:30:28 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-09-08 05:34:45 -07:00
|
|
|
res := lb.Labels()
|
|
|
|
|
2018-04-12 07:54:53 -07:00
|
|
|
if len(rc) > 0 {
|
|
|
|
res = relabel.Process(res, rc...)
|
2016-02-28 00:51:02 -08:00
|
|
|
}
|
|
|
|
|
2017-09-08 05:34:45 -07:00
|
|
|
return res
|
|
|
|
}
|
|
|
|
|
2018-04-12 07:54:53 -07:00
|
|
|
func mutateReportSampleLabels(lset labels.Labels, target *Target) labels.Labels {
|
2017-09-08 05:34:45 -07:00
|
|
|
lb := labels.NewBuilder(lset)
|
|
|
|
|
|
|
|
for _, l := range target.Labels() {
|
2019-08-13 03:19:17 -07:00
|
|
|
lb.Set(model.ExportedLabelPrefix+l.Name, lset.Get(l.Name))
|
2017-09-08 05:34:45 -07:00
|
|
|
lb.Set(l.Name, l.Value)
|
2016-02-28 00:51:02 -08:00
|
|
|
}
|
2017-09-08 05:34:45 -07:00
|
|
|
|
|
|
|
return lb.Labels()
|
2016-02-28 00:51:02 -08:00
|
|
|
}
|
|
|
|
|
2017-09-08 05:34:45 -07:00
|
|
|
// appender returns an appender for ingested samples from the target.
|
2018-04-12 07:54:53 -07:00
|
|
|
func appender(app storage.Appender, limit int) storage.Appender {
|
2017-09-08 05:34:45 -07:00
|
|
|
app = &timeLimitAppender{
|
2016-12-30 12:35:35 -08:00
|
|
|
Appender: app,
|
2017-09-08 05:34:45 -07:00
|
|
|
maxTime: timestamp.FromTime(time.Now().Add(maxAheadTime)),
|
|
|
|
}
|
|
|
|
|
|
|
|
// The limit is applied after metrics are potentially dropped via relabeling.
|
2018-04-12 07:54:53 -07:00
|
|
|
if limit > 0 {
|
2017-09-08 05:34:45 -07:00
|
|
|
app = &limitAppender{
|
|
|
|
Appender: app,
|
2018-04-12 07:54:53 -07:00
|
|
|
limit: limit,
|
2017-09-08 05:34:45 -07:00
|
|
|
}
|
2016-02-28 00:51:02 -08:00
|
|
|
}
|
2017-09-08 05:34:45 -07:00
|
|
|
return app
|
2016-02-28 00:51:02 -08:00
|
|
|
}
|
|
|
|
|
2016-02-25 04:58:46 -08:00
|
|
|
// A scraper retrieves samples and accepts a status report at the end.
|
|
|
|
type scraper interface {
|
2018-10-04 06:52:03 -07:00
|
|
|
scrape(ctx context.Context, w io.Writer) (string, error)
|
2019-11-11 13:42:24 -08:00
|
|
|
Report(start time.Time, dur time.Duration, err error)
|
2019-03-12 03:46:15 -07:00
|
|
|
offset(interval time.Duration, jitterSeed uint64) time.Duration
|
2016-02-25 04:58:46 -08:00
|
|
|
}
|
|
|
|
|
2016-02-28 10:21:50 -08:00
|
|
|
// targetScraper implements the scraper interface for a target.
|
|
|
|
type targetScraper struct {
|
|
|
|
*Target
|
|
|
|
|
2017-04-04 10:26:28 -07:00
|
|
|
client *http.Client
|
2017-04-27 01:19:55 -07:00
|
|
|
req *http.Request
|
2017-04-04 10:26:28 -07:00
|
|
|
timeout time.Duration
|
2017-01-15 08:33:07 -08:00
|
|
|
|
2017-02-22 04:00:51 -08:00
|
|
|
gzipr *gzip.Reader
|
|
|
|
buf *bufio.Reader
|
2021-05-15 19:19:22 -07:00
|
|
|
|
|
|
|
bodySizeLimit int64
|
2017-01-15 08:33:07 -08:00
|
|
|
}
|
|
|
|
|
2021-05-15 19:19:22 -07:00
|
|
|
var errBodySizeLimit = errors.New("body size limit exceeded")
|
|
|
|
|
2018-10-05 09:11:16 -07:00
|
|
|
const acceptHeader = `application/openmetrics-text; version=0.0.1,text/plain;version=0.0.4;q=0.5,*/*;q=0.1`
|
2017-01-15 08:33:07 -08:00
|
|
|
|
2017-02-28 05:59:33 -08:00
|
|
|
var userAgentHeader = fmt.Sprintf("Prometheus/%s", version.Version)
|
|
|
|
|
2018-10-04 06:52:03 -07:00
|
|
|
func (s *targetScraper) scrape(ctx context.Context, w io.Writer) (string, error) {
|
2017-02-22 04:00:51 -08:00
|
|
|
if s.req == nil {
|
|
|
|
req, err := http.NewRequest("GET", s.URL().String(), nil)
|
|
|
|
if err != nil {
|
2018-10-04 06:52:03 -07:00
|
|
|
return "", err
|
2017-02-22 04:00:51 -08:00
|
|
|
}
|
2017-09-22 09:06:43 -07:00
|
|
|
req.Header.Add("Accept", acceptHeader)
|
2017-02-22 04:00:51 -08:00
|
|
|
req.Header.Add("Accept-Encoding", "gzip")
|
2017-03-17 08:27:07 -07:00
|
|
|
req.Header.Set("User-Agent", userAgentHeader)
|
2021-06-18 00:38:12 -07:00
|
|
|
req.Header.Set("X-Prometheus-Scrape-Timeout-Seconds", strconv.FormatFloat(s.timeout.Seconds(), 'f', -1, 64))
|
2017-02-22 04:00:51 -08:00
|
|
|
|
|
|
|
s.req = req
|
2016-02-28 10:21:50 -08:00
|
|
|
}
|
2017-08-09 07:30:49 -07:00
|
|
|
|
2018-11-19 03:31:16 -08:00
|
|
|
resp, err := s.client.Do(s.req.WithContext(ctx))
|
2016-02-28 10:21:50 -08:00
|
|
|
if err != nil {
|
2018-10-04 06:52:03 -07:00
|
|
|
return "", err
|
2016-02-28 10:21:50 -08:00
|
|
|
}
|
2019-04-18 01:50:37 -07:00
|
|
|
defer func() {
|
|
|
|
io.Copy(ioutil.Discard, resp.Body)
|
|
|
|
resp.Body.Close()
|
|
|
|
}()
|
2016-02-28 10:21:50 -08:00
|
|
|
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
2019-03-25 16:01:12 -07:00
|
|
|
return "", errors.Errorf("server returned HTTP status %s", resp.Status)
|
2016-02-28 10:21:50 -08:00
|
|
|
}
|
|
|
|
|
2021-05-15 19:19:22 -07:00
|
|
|
if s.bodySizeLimit <= 0 {
|
|
|
|
s.bodySizeLimit = math.MaxInt64
|
|
|
|
}
|
2017-02-22 04:00:51 -08:00
|
|
|
if resp.Header.Get("Content-Encoding") != "gzip" {
|
2021-05-15 19:19:22 -07:00
|
|
|
n, err := io.Copy(w, io.LimitReader(resp.Body, s.bodySizeLimit))
|
2018-11-26 05:05:07 -08:00
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
2021-05-15 19:19:22 -07:00
|
|
|
if n >= s.bodySizeLimit {
|
|
|
|
targetScrapeExceededBodySizeLimit.Inc()
|
|
|
|
return "", errBodySizeLimit
|
|
|
|
}
|
2018-11-26 05:05:07 -08:00
|
|
|
return resp.Header.Get("Content-Type"), nil
|
2017-02-22 04:00:51 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
if s.gzipr == nil {
|
|
|
|
s.buf = bufio.NewReader(resp.Body)
|
|
|
|
s.gzipr, err = gzip.NewReader(s.buf)
|
|
|
|
if err != nil {
|
2018-10-04 06:52:03 -07:00
|
|
|
return "", err
|
2017-02-22 04:00:51 -08:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
s.buf.Reset(resp.Body)
|
2018-08-17 08:24:35 -07:00
|
|
|
if err = s.gzipr.Reset(s.buf); err != nil {
|
2018-10-04 06:52:03 -07:00
|
|
|
return "", err
|
2018-08-17 08:24:35 -07:00
|
|
|
}
|
2017-02-22 04:00:51 -08:00
|
|
|
}
|
|
|
|
|
2021-05-15 19:19:22 -07:00
|
|
|
n, err := io.Copy(w, io.LimitReader(s.gzipr, s.bodySizeLimit))
|
2017-02-22 04:00:51 -08:00
|
|
|
s.gzipr.Close()
|
2018-10-04 06:52:03 -07:00
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
2021-05-15 19:19:22 -07:00
|
|
|
if n >= s.bodySizeLimit {
|
|
|
|
targetScrapeExceededBodySizeLimit.Inc()
|
|
|
|
return "", errBodySizeLimit
|
|
|
|
}
|
2018-10-04 06:52:03 -07:00
|
|
|
return resp.Header.Get("Content-Type"), nil
|
2016-02-28 10:21:50 -08:00
|
|
|
}
|
|
|
|
|
2016-02-28 00:51:02 -08:00
|
|
|
// A loop can run and be stopped again. It must not be reused after it was stopped.
|
2016-02-22 07:46:55 -08:00
|
|
|
type loop interface {
|
2021-08-31 08:37:32 -07:00
|
|
|
run(errc chan<- error)
|
2020-07-30 05:20:24 -07:00
|
|
|
setForcedError(err error)
|
2016-02-22 07:46:55 -08:00
|
|
|
stop()
|
2020-01-22 04:13:47 -08:00
|
|
|
getCache() *scrapeCache
|
2020-03-20 09:43:26 -07:00
|
|
|
disableEndOfRunStalenessMarkers()
|
2016-02-22 07:46:55 -08:00
|
|
|
}
|
|
|
|
|
2017-09-15 02:08:51 -07:00
|
|
|
type cacheEntry struct {
|
2017-09-07 05:14:41 -07:00
|
|
|
ref uint64
|
2017-05-25 23:44:24 -07:00
|
|
|
lastIter uint64
|
2017-09-15 02:08:51 -07:00
|
|
|
hash uint64
|
|
|
|
lset labels.Labels
|
2017-04-13 10:07:23 -07:00
|
|
|
}
|
|
|
|
|
2016-02-22 07:46:55 -08:00
|
|
|
type scrapeLoop struct {
|
2019-03-15 03:04:15 -07:00
|
|
|
scraper scraper
|
|
|
|
l log.Logger
|
|
|
|
cache *scrapeCache
|
|
|
|
lastScrapeSize int
|
|
|
|
buffers *pool.Pool
|
|
|
|
jitterSeed uint64
|
|
|
|
honorTimestamps bool
|
2020-07-30 05:20:24 -07:00
|
|
|
forcedErr error
|
|
|
|
forcedErrMtx sync.Mutex
|
2021-05-06 01:56:21 -07:00
|
|
|
labelLimits *labelLimits
|
2021-08-31 08:37:32 -07:00
|
|
|
interval time.Duration
|
|
|
|
timeout time.Duration
|
2017-05-25 23:44:24 -07:00
|
|
|
|
2020-07-24 07:10:51 -07:00
|
|
|
appender func(ctx context.Context) storage.Appender
|
2017-09-08 05:34:45 -07:00
|
|
|
sampleMutator labelsMutator
|
|
|
|
reportSampleMutator labelsMutator
|
2016-02-22 07:46:55 -08:00
|
|
|
|
2019-08-28 06:55:09 -07:00
|
|
|
parentCtx context.Context
|
2017-05-26 01:44:48 -07:00
|
|
|
ctx context.Context
|
|
|
|
cancel func()
|
|
|
|
stopped chan struct{}
|
2020-03-20 09:43:26 -07:00
|
|
|
|
|
|
|
disabledEndOfRunStalenessMarkers bool
|
2017-05-26 01:44:48 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// scrapeCache tracks mappings of exposed metric strings to label sets and
|
|
|
|
// storage references. Additionally, it tracks staleness of series between
|
|
|
|
// scrapes.
|
|
|
|
type scrapeCache struct {
|
|
|
|
iter uint64 // Current scrape iteration.
|
|
|
|
|
2019-03-28 10:52:46 -07:00
|
|
|
// How many series and metadata entries there were at the last success.
|
|
|
|
successfulCount int
|
|
|
|
|
2017-09-15 02:08:51 -07:00
|
|
|
// Parsed string to an entry with information about the actual label set
|
|
|
|
// and its storage reference.
|
2018-05-18 00:32:11 -07:00
|
|
|
series map[string]*cacheEntry
|
2017-05-24 07:23:48 -07:00
|
|
|
|
2017-09-08 05:34:45 -07:00
|
|
|
// Cache of dropped metric strings and their iteration. The iteration must
|
|
|
|
// be a pointer so we can update it without setting a new entry with an unsafe
|
|
|
|
// string in addDropped().
|
2018-05-18 00:32:11 -07:00
|
|
|
droppedSeries map[string]*uint64
|
2017-09-08 05:34:45 -07:00
|
|
|
|
2017-05-24 07:23:48 -07:00
|
|
|
// seriesCur and seriesPrev store the labels of series that were seen
|
|
|
|
// in the current and previous scrape.
|
2017-05-24 08:05:42 -07:00
|
|
|
// We hold two maps and swap them out to save allocations.
|
2017-05-25 23:44:24 -07:00
|
|
|
seriesCur map[uint64]labels.Labels
|
|
|
|
seriesPrev map[uint64]labels.Labels
|
2018-05-18 00:32:11 -07:00
|
|
|
|
|
|
|
metaMtx sync.Mutex
|
|
|
|
metadata map[string]*metaEntry
|
|
|
|
}
|
|
|
|
|
|
|
|
// metaEntry holds meta information about a metric.
|
|
|
|
type metaEntry struct {
|
2018-06-05 03:30:19 -07:00
|
|
|
lastIter uint64 // Last scrape iteration the entry was observed at.
|
2018-05-18 00:32:11 -07:00
|
|
|
typ textparse.MetricType
|
|
|
|
help string
|
2018-10-05 09:11:16 -07:00
|
|
|
unit string
|
2017-05-26 01:44:48 -07:00
|
|
|
}
|
2017-01-15 08:33:07 -08:00
|
|
|
|
2020-01-29 03:13:18 -08:00
|
|
|
func (m *metaEntry) size() int {
|
|
|
|
// The attribute lastIter although part of the struct it is not metadata.
|
|
|
|
return len(m.help) + len(m.unit) + len(m.typ)
|
|
|
|
}
|
|
|
|
|
2017-05-26 01:44:48 -07:00
|
|
|
func newScrapeCache() *scrapeCache {
|
|
|
|
return &scrapeCache{
|
2018-05-18 00:32:11 -07:00
|
|
|
series: map[string]*cacheEntry{},
|
|
|
|
droppedSeries: map[string]*uint64{},
|
|
|
|
seriesCur: map[uint64]labels.Labels{},
|
|
|
|
seriesPrev: map[uint64]labels.Labels{},
|
|
|
|
metadata: map[string]*metaEntry{},
|
2017-05-26 01:44:48 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-28 10:52:46 -07:00
|
|
|
func (c *scrapeCache) iterDone(flushCache bool) {
|
|
|
|
c.metaMtx.Lock()
|
|
|
|
count := len(c.series) + len(c.droppedSeries) + len(c.metadata)
|
|
|
|
c.metaMtx.Unlock()
|
|
|
|
|
|
|
|
if flushCache {
|
|
|
|
c.successfulCount = count
|
|
|
|
} else if count > c.successfulCount*2+1000 {
|
|
|
|
// If a target had varying labels in scrapes that ultimately failed,
|
|
|
|
// the caches would grow indefinitely. Force a flush when this happens.
|
|
|
|
// We use the heuristic that this is a doubling of the cache size
|
|
|
|
// since the last scrape, and allow an additional 1000 in case
|
|
|
|
// initial scrapes all fail.
|
|
|
|
flushCache = true
|
|
|
|
targetScrapeCacheFlushForced.Inc()
|
|
|
|
}
|
|
|
|
|
|
|
|
if flushCache {
|
2019-03-28 10:07:14 -07:00
|
|
|
// All caches may grow over time through series churn
|
|
|
|
// or multiple string representations of the same metric. Clean up entries
|
|
|
|
// that haven't appeared in the last scrape.
|
|
|
|
for s, e := range c.series {
|
|
|
|
if c.iter != e.lastIter {
|
|
|
|
delete(c.series, s)
|
|
|
|
}
|
2017-05-26 01:44:48 -07:00
|
|
|
}
|
2019-03-28 10:07:14 -07:00
|
|
|
for s, iter := range c.droppedSeries {
|
|
|
|
if c.iter != *iter {
|
|
|
|
delete(c.droppedSeries, s)
|
|
|
|
}
|
2017-09-08 05:34:45 -07:00
|
|
|
}
|
2019-03-28 10:07:14 -07:00
|
|
|
c.metaMtx.Lock()
|
|
|
|
for m, e := range c.metadata {
|
|
|
|
// Keep metadata around for 10 scrapes after its metric disappeared.
|
|
|
|
if c.iter-e.lastIter > 10 {
|
|
|
|
delete(c.metadata, m)
|
|
|
|
}
|
2018-05-18 00:32:11 -07:00
|
|
|
}
|
2019-03-28 10:07:14 -07:00
|
|
|
c.metaMtx.Unlock()
|
|
|
|
|
|
|
|
c.iter++
|
2018-05-18 00:32:11 -07:00
|
|
|
}
|
2017-05-26 01:44:48 -07:00
|
|
|
|
|
|
|
// Swap current and previous series.
|
|
|
|
c.seriesPrev, c.seriesCur = c.seriesCur, c.seriesPrev
|
|
|
|
|
|
|
|
// We have to delete every single key in the map.
|
|
|
|
for k := range c.seriesCur {
|
|
|
|
delete(c.seriesCur, k)
|
|
|
|
}
|
2016-02-22 07:46:55 -08:00
|
|
|
}
|
|
|
|
|
2017-09-15 02:08:51 -07:00
|
|
|
func (c *scrapeCache) get(met string) (*cacheEntry, bool) {
|
2018-05-18 00:32:11 -07:00
|
|
|
e, ok := c.series[met]
|
2017-05-26 01:44:48 -07:00
|
|
|
if !ok {
|
2017-09-15 02:08:51 -07:00
|
|
|
return nil, false
|
2017-05-26 01:44:48 -07:00
|
|
|
}
|
|
|
|
e.lastIter = c.iter
|
2017-09-15 02:08:51 -07:00
|
|
|
return e, true
|
2017-05-26 01:44:48 -07:00
|
|
|
}
|
|
|
|
|
2017-09-07 05:14:41 -07:00
|
|
|
func (c *scrapeCache) addRef(met string, ref uint64, lset labels.Labels, hash uint64) {
|
|
|
|
if ref == 0 {
|
2017-06-25 23:56:40 -07:00
|
|
|
return
|
|
|
|
}
|
2018-05-18 00:32:11 -07:00
|
|
|
c.series[met] = &cacheEntry{ref: ref, lastIter: c.iter, lset: lset, hash: hash}
|
2017-05-26 01:44:48 -07:00
|
|
|
}
|
|
|
|
|
2017-09-08 05:34:45 -07:00
|
|
|
func (c *scrapeCache) addDropped(met string) {
|
|
|
|
iter := c.iter
|
2018-05-18 00:32:11 -07:00
|
|
|
c.droppedSeries[met] = &iter
|
2017-09-08 05:34:45 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
func (c *scrapeCache) getDropped(met string) bool {
|
2018-05-18 00:32:11 -07:00
|
|
|
iterp, ok := c.droppedSeries[met]
|
2017-09-08 05:34:45 -07:00
|
|
|
if ok {
|
|
|
|
*iterp = c.iter
|
|
|
|
}
|
|
|
|
return ok
|
|
|
|
}
|
|
|
|
|
2017-06-25 23:56:40 -07:00
|
|
|
func (c *scrapeCache) trackStaleness(hash uint64, lset labels.Labels) {
|
|
|
|
c.seriesCur[hash] = lset
|
2017-05-26 01:44:48 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
func (c *scrapeCache) forEachStale(f func(labels.Labels) bool) {
|
|
|
|
for h, lset := range c.seriesPrev {
|
|
|
|
if _, ok := c.seriesCur[h]; !ok {
|
|
|
|
if !f(lset) {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-18 00:32:11 -07:00
|
|
|
func (c *scrapeCache) setType(metric []byte, t textparse.MetricType) {
|
|
|
|
c.metaMtx.Lock()
|
|
|
|
|
|
|
|
e, ok := c.metadata[yoloString(metric)]
|
|
|
|
if !ok {
|
2018-10-05 09:11:16 -07:00
|
|
|
e = &metaEntry{typ: textparse.MetricTypeUnknown}
|
2018-05-18 00:32:11 -07:00
|
|
|
c.metadata[string(metric)] = e
|
|
|
|
}
|
|
|
|
e.typ = t
|
|
|
|
e.lastIter = c.iter
|
|
|
|
|
|
|
|
c.metaMtx.Unlock()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *scrapeCache) setHelp(metric, help []byte) {
|
|
|
|
c.metaMtx.Lock()
|
|
|
|
|
|
|
|
e, ok := c.metadata[yoloString(metric)]
|
|
|
|
if !ok {
|
2018-10-05 09:11:16 -07:00
|
|
|
e = &metaEntry{typ: textparse.MetricTypeUnknown}
|
2018-05-18 00:32:11 -07:00
|
|
|
c.metadata[string(metric)] = e
|
|
|
|
}
|
|
|
|
if e.help != yoloString(help) {
|
|
|
|
e.help = string(help)
|
|
|
|
}
|
|
|
|
e.lastIter = c.iter
|
|
|
|
|
|
|
|
c.metaMtx.Unlock()
|
|
|
|
}
|
|
|
|
|
2018-10-05 09:11:16 -07:00
|
|
|
func (c *scrapeCache) setUnit(metric, unit []byte) {
|
|
|
|
c.metaMtx.Lock()
|
|
|
|
|
|
|
|
e, ok := c.metadata[yoloString(metric)]
|
|
|
|
if !ok {
|
|
|
|
e = &metaEntry{typ: textparse.MetricTypeUnknown}
|
|
|
|
c.metadata[string(metric)] = e
|
|
|
|
}
|
|
|
|
if e.unit != yoloString(unit) {
|
|
|
|
e.unit = string(unit)
|
|
|
|
}
|
|
|
|
e.lastIter = c.iter
|
|
|
|
|
|
|
|
c.metaMtx.Unlock()
|
|
|
|
}
|
|
|
|
|
2019-12-04 07:18:27 -08:00
|
|
|
func (c *scrapeCache) GetMetadata(metric string) (MetricMetadata, bool) {
|
2018-05-18 00:32:11 -07:00
|
|
|
c.metaMtx.Lock()
|
|
|
|
defer c.metaMtx.Unlock()
|
|
|
|
|
|
|
|
m, ok := c.metadata[metric]
|
|
|
|
if !ok {
|
|
|
|
return MetricMetadata{}, false
|
|
|
|
}
|
|
|
|
return MetricMetadata{
|
|
|
|
Metric: metric,
|
|
|
|
Type: m.typ,
|
|
|
|
Help: m.help,
|
2018-10-05 09:11:16 -07:00
|
|
|
Unit: m.unit,
|
2018-05-18 00:32:11 -07:00
|
|
|
}, true
|
|
|
|
}
|
|
|
|
|
2019-12-04 07:18:27 -08:00
|
|
|
func (c *scrapeCache) ListMetadata() []MetricMetadata {
|
2018-05-18 00:32:11 -07:00
|
|
|
c.metaMtx.Lock()
|
|
|
|
defer c.metaMtx.Unlock()
|
|
|
|
|
2018-06-05 03:30:19 -07:00
|
|
|
res := make([]MetricMetadata, 0, len(c.metadata))
|
|
|
|
|
2018-05-18 00:32:11 -07:00
|
|
|
for m, e := range c.metadata {
|
|
|
|
res = append(res, MetricMetadata{
|
|
|
|
Metric: m,
|
|
|
|
Type: e.typ,
|
|
|
|
Help: e.help,
|
2018-10-05 09:11:16 -07:00
|
|
|
Unit: e.unit,
|
2018-05-18 00:32:11 -07:00
|
|
|
})
|
|
|
|
}
|
|
|
|
return res
|
|
|
|
}
|
|
|
|
|
2020-01-29 03:13:18 -08:00
|
|
|
// MetadataSize returns the size of the metadata cache.
|
|
|
|
func (c *scrapeCache) SizeMetadata() (s int) {
|
|
|
|
c.metaMtx.Lock()
|
|
|
|
defer c.metaMtx.Unlock()
|
|
|
|
for _, e := range c.metadata {
|
|
|
|
s += e.size()
|
|
|
|
}
|
|
|
|
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
|
|
|
// MetadataLen returns the number of metadata entries in the cache.
|
|
|
|
func (c *scrapeCache) LengthMetadata() int {
|
|
|
|
c.metaMtx.Lock()
|
|
|
|
defer c.metaMtx.Unlock()
|
|
|
|
|
|
|
|
return len(c.metadata)
|
|
|
|
}
|
|
|
|
|
2017-11-26 07:15:15 -08:00
|
|
|
func newScrapeLoop(ctx context.Context,
|
2017-05-26 01:44:48 -07:00
|
|
|
sc scraper,
|
|
|
|
l log.Logger,
|
2018-02-13 12:44:51 -08:00
|
|
|
buffers *pool.Pool,
|
2017-09-08 05:34:45 -07:00
|
|
|
sampleMutator labelsMutator,
|
|
|
|
reportSampleMutator labelsMutator,
|
2020-07-24 07:10:51 -07:00
|
|
|
appender func(ctx context.Context) storage.Appender,
|
2018-05-18 00:32:11 -07:00
|
|
|
cache *scrapeCache,
|
2019-03-12 03:46:15 -07:00
|
|
|
jitterSeed uint64,
|
2019-03-15 03:04:15 -07:00
|
|
|
honorTimestamps bool,
|
2021-05-06 01:56:21 -07:00
|
|
|
labelLimits *labelLimits,
|
2021-08-31 08:37:32 -07:00
|
|
|
interval time.Duration,
|
|
|
|
timeout time.Duration,
|
2017-05-26 01:44:48 -07:00
|
|
|
) *scrapeLoop {
|
2017-05-16 06:04:37 -07:00
|
|
|
if l == nil {
|
2017-08-11 11:45:52 -07:00
|
|
|
l = log.NewNopLogger()
|
2017-05-16 06:04:37 -07:00
|
|
|
}
|
2017-09-07 05:43:21 -07:00
|
|
|
if buffers == nil {
|
2018-02-13 12:44:51 -08:00
|
|
|
buffers = pool.New(1e3, 1e6, 3, func(sz int) interface{} { return make([]byte, 0, sz) })
|
2017-09-07 05:43:21 -07:00
|
|
|
}
|
2018-05-18 00:32:11 -07:00
|
|
|
if cache == nil {
|
|
|
|
cache = newScrapeCache()
|
|
|
|
}
|
2016-02-22 07:46:55 -08:00
|
|
|
sl := &scrapeLoop{
|
2017-09-08 05:34:45 -07:00
|
|
|
scraper: sc,
|
|
|
|
buffers: buffers,
|
2018-05-18 00:32:11 -07:00
|
|
|
cache: cache,
|
2017-09-08 05:34:45 -07:00
|
|
|
appender: appender,
|
|
|
|
sampleMutator: sampleMutator,
|
|
|
|
reportSampleMutator: reportSampleMutator,
|
|
|
|
stopped: make(chan struct{}),
|
2019-03-12 03:46:15 -07:00
|
|
|
jitterSeed: jitterSeed,
|
2017-09-08 05:34:45 -07:00
|
|
|
l: l,
|
2019-08-28 06:55:09 -07:00
|
|
|
parentCtx: ctx,
|
2019-03-15 03:04:15 -07:00
|
|
|
honorTimestamps: honorTimestamps,
|
2021-05-06 01:56:21 -07:00
|
|
|
labelLimits: labelLimits,
|
2021-08-31 08:37:32 -07:00
|
|
|
interval: interval,
|
|
|
|
timeout: timeout,
|
2016-02-22 07:46:55 -08:00
|
|
|
}
|
2019-08-28 06:55:09 -07:00
|
|
|
sl.ctx, sl.cancel = context.WithCancel(ctx)
|
2016-02-22 07:46:55 -08:00
|
|
|
|
|
|
|
return sl
|
|
|
|
}
|
|
|
|
|
2021-08-31 08:37:32 -07:00
|
|
|
func (sl *scrapeLoop) run(errc chan<- error) {
|
2016-02-22 07:46:55 -08:00
|
|
|
select {
|
2021-08-31 08:37:32 -07:00
|
|
|
case <-time.After(sl.scraper.offset(sl.interval, sl.jitterSeed)):
|
2016-02-22 07:46:55 -08:00
|
|
|
// Continue after a scraping offset.
|
2019-08-28 06:55:09 -07:00
|
|
|
case <-sl.ctx.Done():
|
2017-05-10 08:59:02 -07:00
|
|
|
close(sl.stopped)
|
2016-02-22 07:46:55 -08:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
var last time.Time
|
|
|
|
|
2021-03-15 06:05:17 -07:00
|
|
|
alignedScrapeTime := time.Now().Round(0)
|
2021-08-31 08:37:32 -07:00
|
|
|
ticker := time.NewTicker(sl.interval)
|
2016-02-22 07:46:55 -08:00
|
|
|
defer ticker.Stop()
|
|
|
|
|
2017-05-10 08:59:02 -07:00
|
|
|
mainLoop:
|
2016-02-22 07:46:55 -08:00
|
|
|
for {
|
|
|
|
select {
|
2019-08-28 06:55:09 -07:00
|
|
|
case <-sl.parentCtx.Done():
|
2017-05-10 08:59:02 -07:00
|
|
|
close(sl.stopped)
|
2016-02-22 07:46:55 -08:00
|
|
|
return
|
2019-08-28 06:55:09 -07:00
|
|
|
case <-sl.ctx.Done():
|
2017-05-10 08:59:02 -07:00
|
|
|
break mainLoop
|
2016-02-22 07:46:55 -08:00
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
2020-10-05 09:17:50 -07:00
|
|
|
// Temporary workaround for a jitter in go timers that causes disk space
|
|
|
|
// increase in TSDB.
|
|
|
|
// See https://github.com/prometheus/prometheus/issues/7846
|
2021-03-15 06:05:17 -07:00
|
|
|
// Calling Round ensures the time used is the wall clock, as otherwise .Sub
|
|
|
|
// and .Add on time.Time behave differently (see time package docs).
|
|
|
|
scrapeTime := time.Now().Round(0)
|
2021-08-31 08:37:32 -07:00
|
|
|
if AlignScrapeTimestamps && sl.interval > 100*scrapeTimestampTolerance {
|
2020-10-06 04:48:24 -07:00
|
|
|
// For some reason, a tick might have been skipped, in which case we
|
2020-10-05 09:17:50 -07:00
|
|
|
// would call alignedScrapeTime.Add(interval) multiple times.
|
2021-08-31 08:37:32 -07:00
|
|
|
for scrapeTime.Sub(alignedScrapeTime) >= sl.interval {
|
|
|
|
alignedScrapeTime = alignedScrapeTime.Add(sl.interval)
|
2020-10-05 09:17:50 -07:00
|
|
|
}
|
|
|
|
// Align the scrape time if we are in the tolerance boundaries.
|
2020-10-07 09:25:52 -07:00
|
|
|
if scrapeTime.Sub(alignedScrapeTime) <= scrapeTimestampTolerance {
|
2020-10-05 09:17:50 -07:00
|
|
|
scrapeTime = alignedScrapeTime
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-08-31 08:37:32 -07:00
|
|
|
last = sl.scrapeAndReport(sl.interval, sl.timeout, last, scrapeTime, errc)
|
2016-02-22 09:49:26 -08:00
|
|
|
|
2020-07-16 04:53:39 -07:00
|
|
|
select {
|
|
|
|
case <-sl.parentCtx.Done():
|
|
|
|
close(sl.stopped)
|
|
|
|
return
|
|
|
|
case <-sl.ctx.Done():
|
|
|
|
break mainLoop
|
|
|
|
case <-ticker.C:
|
2016-12-29 00:27:30 -08:00
|
|
|
}
|
2020-07-16 04:53:39 -07:00
|
|
|
}
|
2018-02-20 03:32:23 -08:00
|
|
|
|
2020-07-16 04:53:39 -07:00
|
|
|
close(sl.stopped)
|
2016-02-22 07:46:55 -08:00
|
|
|
|
2020-07-16 04:53:39 -07:00
|
|
|
if !sl.disabledEndOfRunStalenessMarkers {
|
2021-08-31 08:37:32 -07:00
|
|
|
sl.endOfRunStaleness(last, ticker, sl.interval)
|
2020-07-16 04:53:39 -07:00
|
|
|
}
|
|
|
|
}
|
2017-09-07 05:43:21 -07:00
|
|
|
|
2020-07-16 04:53:39 -07:00
|
|
|
// scrapeAndReport performs a scrape and then appends the result to the storage
|
|
|
|
// together with reporting metrics, by using as few appenders as possible.
|
|
|
|
// In the happy scenario, a single appender is used.
|
2020-08-07 06:58:16 -07:00
|
|
|
// This function uses sl.parentCtx instead of sl.ctx on purpose. A scrape should
|
|
|
|
// only be cancelled on shutdown, not on reloads.
|
2020-10-05 09:17:50 -07:00
|
|
|
func (sl *scrapeLoop) scrapeAndReport(interval, timeout time.Duration, last, appendTime time.Time, errc chan<- error) time.Time {
|
2020-07-30 05:20:24 -07:00
|
|
|
start := time.Now()
|
2017-06-14 19:08:03 -07:00
|
|
|
|
2020-07-16 04:53:39 -07:00
|
|
|
// Only record after the first scrape.
|
|
|
|
if !last.IsZero() {
|
|
|
|
targetIntervalLength.WithLabelValues(interval.String()).Observe(
|
|
|
|
time.Since(last).Seconds(),
|
|
|
|
)
|
|
|
|
}
|
2016-02-22 07:46:55 -08:00
|
|
|
|
2020-07-16 04:53:39 -07:00
|
|
|
b := sl.buffers.Get(sl.lastScrapeSize).([]byte)
|
2020-07-31 10:11:08 -07:00
|
|
|
defer sl.buffers.Put(b)
|
2020-07-16 04:53:39 -07:00
|
|
|
buf := bytes.NewBuffer(b)
|
2017-09-07 05:43:21 -07:00
|
|
|
|
2020-07-30 05:20:24 -07:00
|
|
|
var total, added, seriesAdded int
|
|
|
|
var err, appErr, scrapeErr error
|
2020-07-31 10:11:08 -07:00
|
|
|
|
2020-08-07 06:58:16 -07:00
|
|
|
app := sl.appender(sl.parentCtx)
|
2020-07-16 04:53:39 -07:00
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
|
|
|
app.Rollback()
|
2016-02-22 07:46:55 -08:00
|
|
|
return
|
2020-07-16 04:53:39 -07:00
|
|
|
}
|
|
|
|
err = app.Commit()
|
|
|
|
if err != nil {
|
|
|
|
level.Error(sl.l).Log("msg", "Scrape commit failed", "err", err)
|
|
|
|
}
|
|
|
|
}()
|
2020-07-31 10:11:08 -07:00
|
|
|
|
|
|
|
defer func() {
|
2020-10-05 09:17:50 -07:00
|
|
|
if err = sl.report(app, appendTime, time.Since(start), total, added, seriesAdded, scrapeErr); err != nil {
|
2020-07-31 10:11:08 -07:00
|
|
|
level.Warn(sl.l).Log("msg", "Appending scrape report failed", "err", err)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2020-07-30 05:20:24 -07:00
|
|
|
if forcedErr := sl.getForcedError(); forcedErr != nil {
|
2020-07-31 10:11:08 -07:00
|
|
|
scrapeErr = forcedErr
|
2020-07-30 05:20:24 -07:00
|
|
|
// Add stale markers.
|
2020-10-05 09:17:50 -07:00
|
|
|
if _, _, _, err := sl.append(app, []byte{}, "", appendTime); err != nil {
|
2020-07-16 04:53:39 -07:00
|
|
|
app.Rollback()
|
2020-08-07 06:58:16 -07:00
|
|
|
app = sl.appender(sl.parentCtx)
|
2020-07-16 04:53:39 -07:00
|
|
|
level.Warn(sl.l).Log("msg", "Append failed", "err", err)
|
2016-02-22 07:46:55 -08:00
|
|
|
}
|
2020-07-30 05:20:24 -07:00
|
|
|
if errc != nil {
|
|
|
|
errc <- forcedErr
|
|
|
|
}
|
2020-07-31 10:11:08 -07:00
|
|
|
|
|
|
|
return start
|
|
|
|
}
|
|
|
|
|
|
|
|
var contentType string
|
2020-08-07 06:58:16 -07:00
|
|
|
scrapeCtx, cancel := context.WithTimeout(sl.parentCtx, timeout)
|
2020-07-31 10:11:08 -07:00
|
|
|
contentType, scrapeErr = sl.scraper.scrape(scrapeCtx, buf)
|
|
|
|
cancel()
|
|
|
|
|
|
|
|
if scrapeErr == nil {
|
|
|
|
b = buf.Bytes()
|
|
|
|
// NOTE: There were issues with misbehaving clients in the past
|
|
|
|
// that occasionally returned empty results. We don't want those
|
|
|
|
// to falsely reset our buffer size.
|
|
|
|
if len(b) > 0 {
|
|
|
|
sl.lastScrapeSize = len(b)
|
|
|
|
}
|
2020-07-30 05:20:24 -07:00
|
|
|
} else {
|
2020-08-01 01:56:21 -07:00
|
|
|
level.Debug(sl.l).Log("msg", "Scrape failed", "err", scrapeErr)
|
2020-07-31 10:11:08 -07:00
|
|
|
if errc != nil {
|
|
|
|
errc <- scrapeErr
|
2020-07-30 05:20:24 -07:00
|
|
|
}
|
2020-07-31 10:11:08 -07:00
|
|
|
}
|
2020-07-30 05:20:24 -07:00
|
|
|
|
2020-07-31 10:11:08 -07:00
|
|
|
// A failed scrape is the same as an empty scrape,
|
|
|
|
// we still call sl.append to trigger stale markers.
|
2020-10-05 09:17:50 -07:00
|
|
|
total, added, seriesAdded, appErr = sl.append(app, b, contentType, appendTime)
|
2020-07-31 10:11:08 -07:00
|
|
|
if appErr != nil {
|
|
|
|
app.Rollback()
|
2020-08-07 06:58:16 -07:00
|
|
|
app = sl.appender(sl.parentCtx)
|
2020-07-31 10:11:08 -07:00
|
|
|
level.Debug(sl.l).Log("msg", "Append failed", "err", appErr)
|
|
|
|
// The append failed, probably due to a parse error or sample limit.
|
|
|
|
// Call sl.append again with an empty scrape to trigger stale markers.
|
2020-10-05 09:17:50 -07:00
|
|
|
if _, _, _, err := sl.append(app, []byte{}, "", appendTime); err != nil {
|
2020-07-30 05:20:24 -07:00
|
|
|
app.Rollback()
|
2020-08-07 06:58:16 -07:00
|
|
|
app = sl.appender(sl.parentCtx)
|
2020-07-31 10:11:08 -07:00
|
|
|
level.Warn(sl.l).Log("msg", "Append failed", "err", err)
|
2020-07-30 05:20:24 -07:00
|
|
|
}
|
2016-02-22 07:46:55 -08:00
|
|
|
}
|
2017-05-10 08:59:02 -07:00
|
|
|
|
2020-07-16 04:53:39 -07:00
|
|
|
if scrapeErr == nil {
|
|
|
|
scrapeErr = appErr
|
|
|
|
}
|
|
|
|
|
|
|
|
return start
|
2017-05-16 05:12:21 -07:00
|
|
|
}
|
|
|
|
|
2020-07-30 05:20:24 -07:00
|
|
|
func (sl *scrapeLoop) setForcedError(err error) {
|
|
|
|
sl.forcedErrMtx.Lock()
|
|
|
|
defer sl.forcedErrMtx.Unlock()
|
|
|
|
sl.forcedErr = err
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sl *scrapeLoop) getForcedError() error {
|
|
|
|
sl.forcedErrMtx.Lock()
|
|
|
|
defer sl.forcedErrMtx.Unlock()
|
|
|
|
return sl.forcedErr
|
|
|
|
}
|
|
|
|
|
2017-05-16 05:12:21 -07:00
|
|
|
func (sl *scrapeLoop) endOfRunStaleness(last time.Time, ticker *time.Ticker, interval time.Duration) {
|
2017-05-10 08:59:02 -07:00
|
|
|
// Scraping has stopped. We want to write stale markers but
|
|
|
|
// the target may be recreated, so we wait just over 2 scrape intervals
|
|
|
|
// before creating them.
|
2018-11-27 08:44:29 -08:00
|
|
|
// If the context is canceled, we presume the server is shutting down
|
2017-05-10 08:59:02 -07:00
|
|
|
// and will restart where is was. We do not attempt to write stale markers
|
|
|
|
// in this case.
|
|
|
|
|
|
|
|
if last.IsZero() {
|
|
|
|
// There never was a scrape, so there will be no stale markers.
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Wait for when the next scrape would have been, record its timestamp.
|
|
|
|
var staleTime time.Time
|
|
|
|
select {
|
2019-08-28 06:55:09 -07:00
|
|
|
case <-sl.parentCtx.Done():
|
2017-05-10 08:59:02 -07:00
|
|
|
return
|
|
|
|
case <-ticker.C:
|
|
|
|
staleTime = time.Now()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Wait for when the next scrape would have been, if the target was recreated
|
|
|
|
// samples should have been ingested by now.
|
|
|
|
select {
|
2019-08-28 06:55:09 -07:00
|
|
|
case <-sl.parentCtx.Done():
|
2017-05-10 08:59:02 -07:00
|
|
|
return
|
|
|
|
case <-ticker.C:
|
|
|
|
}
|
|
|
|
|
|
|
|
// Wait for an extra 10% of the interval, just to be safe.
|
|
|
|
select {
|
2019-08-28 06:55:09 -07:00
|
|
|
case <-sl.parentCtx.Done():
|
2017-05-10 08:59:02 -07:00
|
|
|
return
|
|
|
|
case <-time.After(interval / 10):
|
|
|
|
}
|
|
|
|
|
|
|
|
// Call sl.append again with an empty scrape to trigger stale markers.
|
|
|
|
// If the target has since been recreated and scraped, the
|
|
|
|
// stale markers will be out of order and ignored.
|
2020-07-24 07:10:51 -07:00
|
|
|
app := sl.appender(sl.ctx)
|
2020-07-16 04:53:39 -07:00
|
|
|
var err error
|
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
|
|
|
app.Rollback()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
err = app.Commit()
|
|
|
|
if err != nil {
|
|
|
|
level.Warn(sl.l).Log("msg", "Stale commit failed", "err", err)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
if _, _, _, err = sl.append(app, []byte{}, "", staleTime); err != nil {
|
|
|
|
app.Rollback()
|
2020-07-24 07:10:51 -07:00
|
|
|
app = sl.appender(sl.ctx)
|
2020-07-16 04:53:39 -07:00
|
|
|
level.Warn(sl.l).Log("msg", "Stale append failed", "err", err)
|
2017-05-10 08:59:02 -07:00
|
|
|
}
|
2020-07-16 04:53:39 -07:00
|
|
|
if err = sl.reportStale(app, staleTime); err != nil {
|
|
|
|
level.Warn(sl.l).Log("msg", "Stale report failed", "err", err)
|
2017-05-11 06:43:43 -07:00
|
|
|
}
|
2016-02-22 07:46:55 -08:00
|
|
|
}
|
|
|
|
|
2017-05-10 08:59:02 -07:00
|
|
|
// Stop the scraping. May still write data and stale markers after it has
|
|
|
|
// returned. Cancel the context to stop all writes.
|
2016-02-22 07:46:55 -08:00
|
|
|
func (sl *scrapeLoop) stop() {
|
|
|
|
sl.cancel()
|
2017-05-10 08:59:02 -07:00
|
|
|
<-sl.stopped
|
2016-02-22 07:46:55 -08:00
|
|
|
}
|
|
|
|
|
2020-03-20 09:43:26 -07:00
|
|
|
func (sl *scrapeLoop) disableEndOfRunStalenessMarkers() {
|
|
|
|
sl.disabledEndOfRunStalenessMarkers = true
|
|
|
|
}
|
|
|
|
|
2020-01-22 04:13:47 -08:00
|
|
|
func (sl *scrapeLoop) getCache() *scrapeCache {
|
|
|
|
return sl.cache
|
|
|
|
}
|
|
|
|
|
2020-03-25 19:31:48 -07:00
|
|
|
type appendErrors struct {
|
2021-03-16 02:47:45 -07:00
|
|
|
numOutOfOrder int
|
|
|
|
numDuplicates int
|
|
|
|
numOutOfBounds int
|
|
|
|
numExemplarOutOfOrder int
|
2020-03-25 19:31:48 -07:00
|
|
|
}
|
|
|
|
|
2020-07-16 04:53:39 -07:00
|
|
|
func (sl *scrapeLoop) append(app storage.Appender, b []byte, contentType string, ts time.Time) (total, added, seriesAdded int, err error) {
|
2016-04-25 07:43:52 -07:00
|
|
|
var (
|
2018-10-04 06:52:03 -07:00
|
|
|
p = textparse.New(b, contentType)
|
2017-07-04 02:24:13 -07:00
|
|
|
defTime = timestamp.FromTime(ts)
|
2020-03-25 19:31:48 -07:00
|
|
|
appErrs = appendErrors{}
|
|
|
|
sampleLimitErr error
|
2016-04-25 07:43:52 -07:00
|
|
|
)
|
2016-02-22 07:46:55 -08:00
|
|
|
|
2020-03-13 12:54:47 -07:00
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
// Only perform cache cleaning if the scrape was not empty.
|
|
|
|
// An empty scrape (usually) is used to indicate a failed scrape.
|
|
|
|
sl.cache.iterDone(len(b) > 0)
|
|
|
|
}()
|
|
|
|
|
2017-02-01 06:59:37 -08:00
|
|
|
loop:
|
2018-05-14 13:19:53 -07:00
|
|
|
for {
|
2020-03-25 19:31:48 -07:00
|
|
|
var (
|
|
|
|
et textparse.Entry
|
|
|
|
sampleAdded bool
|
2021-03-16 02:47:45 -07:00
|
|
|
e exemplar.Exemplar
|
2020-03-25 19:31:48 -07:00
|
|
|
)
|
2018-05-14 13:19:53 -07:00
|
|
|
if et, err = p.Next(); err != nil {
|
|
|
|
if err == io.EOF {
|
|
|
|
err = nil
|
|
|
|
}
|
|
|
|
break
|
|
|
|
}
|
2018-05-18 00:32:11 -07:00
|
|
|
switch et {
|
|
|
|
case textparse.EntryType:
|
|
|
|
sl.cache.setType(p.Type())
|
|
|
|
continue
|
|
|
|
case textparse.EntryHelp:
|
|
|
|
sl.cache.setHelp(p.Help())
|
2018-05-14 13:19:53 -07:00
|
|
|
continue
|
2018-10-05 09:11:16 -07:00
|
|
|
case textparse.EntryUnit:
|
|
|
|
sl.cache.setUnit(p.Unit())
|
|
|
|
continue
|
2018-05-18 00:32:11 -07:00
|
|
|
case textparse.EntryComment:
|
|
|
|
continue
|
|
|
|
default:
|
2018-05-14 13:19:53 -07:00
|
|
|
}
|
2017-01-30 08:30:28 -08:00
|
|
|
total++
|
|
|
|
|
2017-01-15 08:33:07 -08:00
|
|
|
t := defTime
|
2018-05-14 13:19:53 -07:00
|
|
|
met, tp, v := p.Series()
|
2019-03-15 03:04:15 -07:00
|
|
|
if !sl.honorTimestamps {
|
|
|
|
tp = nil
|
|
|
|
}
|
2017-01-15 08:33:07 -08:00
|
|
|
if tp != nil {
|
|
|
|
t = *tp
|
|
|
|
}
|
|
|
|
|
2017-09-08 05:34:45 -07:00
|
|
|
if sl.cache.getDropped(yoloString(met)) {
|
|
|
|
continue
|
|
|
|
}
|
2017-09-15 02:08:51 -07:00
|
|
|
ce, ok := sl.cache.get(yoloString(met))
|
2021-02-18 04:07:00 -08:00
|
|
|
var (
|
|
|
|
ref uint64
|
|
|
|
lset labels.Labels
|
|
|
|
mets string
|
|
|
|
hash uint64
|
|
|
|
)
|
2020-03-25 19:31:48 -07:00
|
|
|
|
2017-01-15 08:33:07 -08:00
|
|
|
if ok {
|
2021-02-18 04:07:00 -08:00
|
|
|
ref = ce.ref
|
|
|
|
lset = ce.lset
|
|
|
|
} else {
|
|
|
|
mets = p.Metric(&lset)
|
|
|
|
hash = lset.Hash()
|
2017-09-15 02:08:51 -07:00
|
|
|
|
|
|
|
// Hash label set as it is seen local to the target. Then add target labels
|
|
|
|
// and relabeling and store the final label set.
|
|
|
|
lset = sl.sampleMutator(lset)
|
|
|
|
|
|
|
|
// The label set may be set to nil to indicate dropping.
|
|
|
|
if lset == nil {
|
|
|
|
sl.cache.addDropped(mets)
|
|
|
|
continue
|
2017-06-25 23:56:40 -07:00
|
|
|
}
|
2017-01-30 08:30:28 -08:00
|
|
|
|
2020-03-01 23:18:05 -08:00
|
|
|
if !lset.Has(labels.MetricName) {
|
|
|
|
err = errNameLabelMandatory
|
|
|
|
break loop
|
|
|
|
}
|
2021-05-06 01:56:21 -07:00
|
|
|
|
|
|
|
// If any label limits is exceeded the scrape should fail.
|
|
|
|
if err = verifyLabelLimits(lset, sl.labelLimits); err != nil {
|
|
|
|
targetScrapePoolExceededLabelLimits.Inc()
|
|
|
|
break loop
|
|
|
|
}
|
2021-02-18 04:07:00 -08:00
|
|
|
}
|
2020-03-01 23:18:05 -08:00
|
|
|
|
2021-02-18 04:07:00 -08:00
|
|
|
ref, err = app.Append(ref, lset, t, v)
|
|
|
|
sampleAdded, err = sl.checkAddError(ce, met, tp, err, &sampleLimitErr, &appErrs)
|
|
|
|
if err != nil {
|
|
|
|
if err != storage.ErrNotFound {
|
|
|
|
level.Debug(sl.l).Log("msg", "Unexpected error", "series", string(met), "err", err)
|
2016-02-22 07:46:55 -08:00
|
|
|
}
|
2021-02-18 04:07:00 -08:00
|
|
|
break loop
|
|
|
|
}
|
2020-03-25 19:31:48 -07:00
|
|
|
|
2021-02-18 04:07:00 -08:00
|
|
|
if !ok {
|
2017-04-28 08:36:36 -07:00
|
|
|
if tp == nil {
|
|
|
|
// Bypass staleness logic if there is an explicit timestamp.
|
2017-06-25 23:56:40 -07:00
|
|
|
sl.cache.trackStaleness(hash, lset)
|
2017-04-28 08:36:36 -07:00
|
|
|
}
|
2017-06-25 23:56:40 -07:00
|
|
|
sl.cache.addRef(mets, ref, lset, hash)
|
2020-03-25 19:31:48 -07:00
|
|
|
if sampleAdded && sampleLimitErr == nil {
|
|
|
|
seriesAdded++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-04 08:00:37 -07:00
|
|
|
// Increment added even if there's an error so we correctly report the
|
2020-07-11 06:37:13 -07:00
|
|
|
// number of samples remaining after relabeling.
|
2020-06-04 08:00:37 -07:00
|
|
|
added++
|
|
|
|
|
2021-03-16 02:47:45 -07:00
|
|
|
if hasExemplar := p.Exemplar(&e); hasExemplar {
|
|
|
|
if !e.HasTs {
|
|
|
|
e.Ts = t
|
|
|
|
}
|
|
|
|
_, exemplarErr := app.AppendExemplar(ref, lset, e)
|
|
|
|
exemplarErr = sl.checkAddExemplarError(exemplarErr, e, &appErrs)
|
|
|
|
if exemplarErr != nil {
|
|
|
|
// Since exemplar storage is still experimental, we don't fail the scrape on ingestion errors.
|
|
|
|
level.Debug(sl.l).Log("msg", "Error while adding exemplar in AddExemplar", "exemplar", fmt.Sprintf("%+v", e), "err", exemplarErr)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-02-22 07:46:55 -08:00
|
|
|
}
|
2018-01-09 07:43:28 -08:00
|
|
|
if sampleLimitErr != nil {
|
2018-05-14 13:19:53 -07:00
|
|
|
if err == nil {
|
|
|
|
err = sampleLimitErr
|
|
|
|
}
|
2018-01-09 07:43:28 -08:00
|
|
|
// We only want to increment this once per scrape, so this is Inc'd outside the loop.
|
2017-05-29 06:08:55 -07:00
|
|
|
targetScrapeSampleLimit.Inc()
|
|
|
|
}
|
2020-03-25 19:31:48 -07:00
|
|
|
if appErrs.numOutOfOrder > 0 {
|
|
|
|
level.Warn(sl.l).Log("msg", "Error on ingesting out-of-order samples", "num_dropped", appErrs.numOutOfOrder)
|
2017-05-16 05:30:40 -07:00
|
|
|
}
|
2020-03-25 19:31:48 -07:00
|
|
|
if appErrs.numDuplicates > 0 {
|
|
|
|
level.Warn(sl.l).Log("msg", "Error on ingesting samples with different value but same timestamp", "num_dropped", appErrs.numDuplicates)
|
2017-05-16 05:30:40 -07:00
|
|
|
}
|
2020-03-25 19:31:48 -07:00
|
|
|
if appErrs.numOutOfBounds > 0 {
|
|
|
|
level.Warn(sl.l).Log("msg", "Error on ingesting samples that are too old or are too far into the future", "num_dropped", appErrs.numOutOfBounds)
|
2017-07-04 02:24:13 -07:00
|
|
|
}
|
2021-03-16 02:47:45 -07:00
|
|
|
if appErrs.numExemplarOutOfOrder > 0 {
|
|
|
|
level.Warn(sl.l).Log("msg", "Error on ingesting out-of-order exemplars", "num_dropped", appErrs.numExemplarOutOfOrder)
|
|
|
|
}
|
2017-04-13 10:07:23 -07:00
|
|
|
if err == nil {
|
2017-05-26 01:44:48 -07:00
|
|
|
sl.cache.forEachStale(func(lset labels.Labels) bool {
|
|
|
|
// Series no longer exposed, mark it stale.
|
2021-02-18 04:07:00 -08:00
|
|
|
_, err = app.Append(0, lset, defTime, math.Float64frombits(value.StaleNaN))
|
2020-03-16 14:52:02 -07:00
|
|
|
switch errors.Cause(err) {
|
2017-05-26 01:44:48 -07:00
|
|
|
case storage.ErrOutOfOrderSample, storage.ErrDuplicateSampleForTimestamp:
|
|
|
|
// Do not count these in logging, as this is expected if a target
|
|
|
|
// goes away and comes back again with a new scrape loop.
|
|
|
|
err = nil
|
2017-04-13 10:07:23 -07:00
|
|
|
}
|
2017-05-26 01:44:48 -07:00
|
|
|
return err == nil
|
|
|
|
})
|
2017-04-13 10:07:23 -07:00
|
|
|
}
|
2020-03-13 12:54:47 -07:00
|
|
|
return
|
2016-02-22 07:46:55 -08:00
|
|
|
}
|
|
|
|
|
2017-02-22 04:00:51 -08:00
|
|
|
func yoloString(b []byte) string {
|
2017-03-07 02:41:11 -08:00
|
|
|
return *((*string)(unsafe.Pointer(&b)))
|
2017-02-22 04:00:51 -08:00
|
|
|
}
|
|
|
|
|
2020-03-25 19:31:48 -07:00
|
|
|
// Adds samples to the appender, checking the error, and then returns the # of samples added,
|
|
|
|
// whether the caller should continue to process more samples, and any sample limit errors.
|
2020-05-26 07:14:55 -07:00
|
|
|
func (sl *scrapeLoop) checkAddError(ce *cacheEntry, met []byte, tp *int64, err error, sampleLimitErr *error, appErrs *appendErrors) (bool, error) {
|
2020-03-25 19:31:48 -07:00
|
|
|
switch errors.Cause(err) {
|
|
|
|
case nil:
|
|
|
|
if tp == nil && ce != nil {
|
|
|
|
sl.cache.trackStaleness(ce.hash, ce.lset)
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
case storage.ErrNotFound:
|
|
|
|
return false, storage.ErrNotFound
|
|
|
|
case storage.ErrOutOfOrderSample:
|
|
|
|
appErrs.numOutOfOrder++
|
|
|
|
level.Debug(sl.l).Log("msg", "Out of order sample", "series", string(met))
|
|
|
|
targetScrapeSampleOutOfOrder.Inc()
|
|
|
|
return false, nil
|
|
|
|
case storage.ErrDuplicateSampleForTimestamp:
|
|
|
|
appErrs.numDuplicates++
|
|
|
|
level.Debug(sl.l).Log("msg", "Duplicate sample for timestamp", "series", string(met))
|
|
|
|
targetScrapeSampleDuplicate.Inc()
|
|
|
|
return false, nil
|
|
|
|
case storage.ErrOutOfBounds:
|
|
|
|
appErrs.numOutOfBounds++
|
|
|
|
level.Debug(sl.l).Log("msg", "Out of bounds metric", "series", string(met))
|
|
|
|
targetScrapeSampleOutOfBounds.Inc()
|
|
|
|
return false, nil
|
|
|
|
case errSampleLimit:
|
|
|
|
// Keep on parsing output if we hit the limit, so we report the correct
|
|
|
|
// total number of samples scraped.
|
|
|
|
*sampleLimitErr = err
|
|
|
|
return false, nil
|
|
|
|
default:
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-16 02:47:45 -07:00
|
|
|
func (sl *scrapeLoop) checkAddExemplarError(err error, e exemplar.Exemplar, appErrs *appendErrors) error {
|
|
|
|
switch errors.Cause(err) {
|
|
|
|
case storage.ErrNotFound:
|
|
|
|
return storage.ErrNotFound
|
|
|
|
case storage.ErrOutOfOrderExemplar:
|
|
|
|
appErrs.numExemplarOutOfOrder++
|
|
|
|
level.Debug(sl.l).Log("msg", "Out of order exemplar", "exemplar", fmt.Sprintf("%+v", e))
|
|
|
|
targetScrapeExemplarOutOfOrder.Inc()
|
|
|
|
return nil
|
|
|
|
default:
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-09-15 02:08:51 -07:00
|
|
|
// The constants are suffixed with the invalid \xff unicode rune to avoid collisions
|
|
|
|
// with scraped metrics in the cache.
|
|
|
|
const (
|
|
|
|
scrapeHealthMetricName = "up" + "\xff"
|
|
|
|
scrapeDurationMetricName = "scrape_duration_seconds" + "\xff"
|
|
|
|
scrapeSamplesMetricName = "scrape_samples_scraped" + "\xff"
|
|
|
|
samplesPostRelabelMetricName = "scrape_samples_post_metric_relabeling" + "\xff"
|
2019-05-08 14:24:00 -07:00
|
|
|
scrapeSeriesAddedMetricName = "scrape_series_added" + "\xff"
|
2017-09-15 02:08:51 -07:00
|
|
|
)
|
|
|
|
|
2020-07-16 04:53:39 -07:00
|
|
|
func (sl *scrapeLoop) report(app storage.Appender, start time.Time, duration time.Duration, scraped, added, seriesAdded int, scrapeErr error) (err error) {
|
2020-03-13 12:54:47 -07:00
|
|
|
sl.scraper.Report(start, duration, scrapeErr)
|
2016-02-22 07:46:55 -08:00
|
|
|
|
2017-01-15 08:33:07 -08:00
|
|
|
ts := timestamp.FromTime(start)
|
2016-02-22 07:46:55 -08:00
|
|
|
|
2016-12-29 00:27:30 -08:00
|
|
|
var health float64
|
2020-03-13 12:54:47 -07:00
|
|
|
if scrapeErr == nil {
|
2016-02-22 07:46:55 -08:00
|
|
|
health = 1
|
|
|
|
}
|
2017-01-13 05:48:01 -08:00
|
|
|
|
2020-03-13 12:54:47 -07:00
|
|
|
if err = sl.addReportSample(app, scrapeHealthMetricName, ts, health); err != nil {
|
|
|
|
return
|
2017-01-13 05:48:01 -08:00
|
|
|
}
|
2020-03-13 12:54:47 -07:00
|
|
|
if err = sl.addReportSample(app, scrapeDurationMetricName, ts, duration.Seconds()); err != nil {
|
|
|
|
return
|
2016-05-19 07:22:49 -07:00
|
|
|
}
|
2020-03-13 12:54:47 -07:00
|
|
|
if err = sl.addReportSample(app, scrapeSamplesMetricName, ts, float64(scraped)); err != nil {
|
|
|
|
return
|
2016-10-26 09:43:01 -07:00
|
|
|
}
|
2020-06-04 08:00:37 -07:00
|
|
|
if err = sl.addReportSample(app, samplesPostRelabelMetricName, ts, float64(added)); err != nil {
|
2020-03-13 12:54:47 -07:00
|
|
|
return
|
2017-01-15 08:33:07 -08:00
|
|
|
}
|
2020-03-13 12:54:47 -07:00
|
|
|
if err = sl.addReportSample(app, scrapeSeriesAddedMetricName, ts, float64(seriesAdded)); err != nil {
|
|
|
|
return
|
2019-05-08 14:24:00 -07:00
|
|
|
}
|
2020-03-13 12:54:47 -07:00
|
|
|
return
|
2017-01-15 08:33:07 -08:00
|
|
|
}
|
|
|
|
|
2020-07-16 04:53:39 -07:00
|
|
|
func (sl *scrapeLoop) reportStale(app storage.Appender, start time.Time) (err error) {
|
2017-05-11 06:43:43 -07:00
|
|
|
ts := timestamp.FromTime(start)
|
2017-09-08 05:34:45 -07:00
|
|
|
|
2017-05-11 06:43:43 -07:00
|
|
|
stale := math.Float64frombits(value.StaleNaN)
|
|
|
|
|
2020-03-13 12:54:47 -07:00
|
|
|
if err = sl.addReportSample(app, scrapeHealthMetricName, ts, stale); err != nil {
|
|
|
|
return
|
2017-05-11 06:43:43 -07:00
|
|
|
}
|
2020-03-13 12:54:47 -07:00
|
|
|
if err = sl.addReportSample(app, scrapeDurationMetricName, ts, stale); err != nil {
|
|
|
|
return
|
2017-05-11 06:43:43 -07:00
|
|
|
}
|
2020-03-13 12:54:47 -07:00
|
|
|
if err = sl.addReportSample(app, scrapeSamplesMetricName, ts, stale); err != nil {
|
|
|
|
return
|
2017-05-11 06:43:43 -07:00
|
|
|
}
|
2020-03-13 12:54:47 -07:00
|
|
|
if err = sl.addReportSample(app, samplesPostRelabelMetricName, ts, stale); err != nil {
|
|
|
|
return
|
2017-05-11 06:43:43 -07:00
|
|
|
}
|
2020-03-13 12:54:47 -07:00
|
|
|
if err = sl.addReportSample(app, scrapeSeriesAddedMetricName, ts, stale); err != nil {
|
|
|
|
return
|
2019-05-08 14:24:00 -07:00
|
|
|
}
|
2020-03-13 12:54:47 -07:00
|
|
|
return
|
2017-05-11 06:43:43 -07:00
|
|
|
}
|
|
|
|
|
2017-01-15 08:33:07 -08:00
|
|
|
func (sl *scrapeLoop) addReportSample(app storage.Appender, s string, t int64, v float64) error {
|
2017-09-15 02:08:51 -07:00
|
|
|
ce, ok := sl.cache.get(s)
|
2021-02-18 04:07:00 -08:00
|
|
|
var ref uint64
|
|
|
|
var lset labels.Labels
|
2017-01-15 08:33:07 -08:00
|
|
|
if ok {
|
2021-02-18 04:07:00 -08:00
|
|
|
ref = ce.ref
|
|
|
|
lset = ce.lset
|
|
|
|
} else {
|
|
|
|
lset = labels.Labels{
|
|
|
|
// The constants are suffixed with the invalid \xff unicode rune to avoid collisions
|
|
|
|
// with scraped metrics in the cache.
|
|
|
|
// We have to drop it when building the actual metric.
|
|
|
|
labels.Label{Name: labels.MetricName, Value: s[:len(s)-1]},
|
2017-01-15 08:33:07 -08:00
|
|
|
}
|
2021-02-18 04:07:00 -08:00
|
|
|
lset = sl.reportSampleMutator(lset)
|
2017-01-13 05:48:01 -08:00
|
|
|
}
|
2017-09-08 05:34:45 -07:00
|
|
|
|
2021-02-18 04:07:00 -08:00
|
|
|
ref, err := app.Append(ref, lset, t, v)
|
2020-03-16 14:52:02 -07:00
|
|
|
switch errors.Cause(err) {
|
2017-05-11 06:43:43 -07:00
|
|
|
case nil:
|
2021-02-18 04:07:00 -08:00
|
|
|
if !ok {
|
|
|
|
sl.cache.addRef(s, ref, lset, lset.Hash())
|
|
|
|
}
|
2017-05-11 06:43:43 -07:00
|
|
|
return nil
|
|
|
|
case storage.ErrOutOfOrderSample, storage.ErrDuplicateSampleForTimestamp:
|
2021-02-18 04:07:00 -08:00
|
|
|
// Do not log here, as this is expected if a target goes away and comes back
|
|
|
|
// again with a new scrape loop.
|
2017-05-11 06:43:43 -07:00
|
|
|
return nil
|
|
|
|
default:
|
2017-01-15 08:33:07 -08:00
|
|
|
return err
|
2017-01-13 05:48:01 -08:00
|
|
|
}
|
2016-02-22 07:46:55 -08:00
|
|
|
}
|
2020-01-22 04:13:47 -08:00
|
|
|
|
|
|
|
// zeroConfig returns a new scrape config that only contains configuration items
|
|
|
|
// that alter metrics.
|
|
|
|
func zeroConfig(c *config.ScrapeConfig) *config.ScrapeConfig {
|
|
|
|
z := *c
|
|
|
|
// We zero out the fields that for sure don't affect scrape.
|
|
|
|
z.ScrapeInterval = 0
|
|
|
|
z.ScrapeTimeout = 0
|
|
|
|
z.SampleLimit = 0
|
2021-05-06 01:56:21 -07:00
|
|
|
z.LabelLimit = 0
|
|
|
|
z.LabelNameLengthLimit = 0
|
|
|
|
z.LabelValueLengthLimit = 0
|
2020-01-22 04:13:47 -08:00
|
|
|
z.HTTPClientConfig = config_util.HTTPClientConfig{}
|
|
|
|
return &z
|
|
|
|
}
|
|
|
|
|
2020-03-02 00:21:24 -08:00
|
|
|
// reusableCache compares two scrape config and tells whether the cache is still
|
2020-01-22 04:13:47 -08:00
|
|
|
// valid.
|
|
|
|
func reusableCache(r, l *config.ScrapeConfig) bool {
|
|
|
|
if r == nil || l == nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
return reflect.DeepEqual(zeroConfig(r), zeroConfig(l))
|
|
|
|
}
|