prometheus/scrape/metrics.go

345 lines
12 KiB
Go
Raw Permalink Normal View History

// Copyright 2016 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scrape
import (
"fmt"
"github.com/prometheus/client_golang/prometheus"
)
type scrapeMetrics struct {
reg prometheus.Registerer
// Used by Manager.
targetMetadataCache *MetadataMetricsCollector
targetScrapePools prometheus.Counter
targetScrapePoolsFailed prometheus.Counter
// Used by scrapePool.
targetReloadIntervalLength *prometheus.SummaryVec
targetScrapePoolReloads prometheus.Counter
targetScrapePoolReloadsFailed prometheus.Counter
targetScrapePoolSyncsCounter *prometheus.CounterVec
targetScrapePoolExceededTargetLimit prometheus.Counter
targetScrapePoolTargetLimit *prometheus.GaugeVec
targetScrapePoolTargetsAdded *prometheus.GaugeVec
targetScrapePoolSymbolTableItems *prometheus.GaugeVec
targetSyncIntervalLength *prometheus.SummaryVec
targetSyncFailed *prometheus.CounterVec
// Used by targetScraper.
targetScrapeExceededBodySizeLimit prometheus.Counter
// Used by scrapeCache.
targetScrapeCacheFlushForced prometheus.Counter
// Used by scrapeLoop.
targetIntervalLength *prometheus.SummaryVec
targetScrapeSampleLimit prometheus.Counter
targetScrapeSampleDuplicate prometheus.Counter
targetScrapeSampleOutOfOrder prometheus.Counter
targetScrapeSampleOutOfBounds prometheus.Counter
targetScrapeExemplarOutOfOrder prometheus.Counter
targetScrapePoolExceededLabelLimits prometheus.Counter
targetScrapeNativeHistogramBucketLimit prometheus.Counter
}
func newScrapeMetrics(reg prometheus.Registerer) (*scrapeMetrics, error) {
sm := &scrapeMetrics{reg: reg}
// Manager metrics.
sm.targetMetadataCache = &MetadataMetricsCollector{
CacheEntries: prometheus.NewDesc(
"prometheus_target_metadata_cache_entries",
"Total number of metric metadata entries in the cache",
[]string{"scrape_job"},
nil,
),
CacheBytes: prometheus.NewDesc(
"prometheus_target_metadata_cache_bytes",
"The number of bytes that are currently used for storing metric metadata in the cache",
[]string{"scrape_job"},
nil,
),
// TargetsGatherer should be set later, because it's a circular dependency.
// newScrapeMetrics() is called by NewManager(), while also TargetsGatherer is the new Manager.
}
sm.targetScrapePools = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pools_total",
Help: "Total number of scrape pool creation attempts.",
},
)
sm.targetScrapePoolsFailed = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pools_failed_total",
Help: "Total number of scrape pool creations that failed.",
},
)
// Used by scrapePool.
sm.targetReloadIntervalLength = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Name: "prometheus_target_reload_length_seconds",
Help: "Actual interval to reload the scrape pool with a given configuration.",
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
},
[]string{"interval"},
)
sm.targetScrapePoolReloads = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pool_reloads_total",
Help: "Total number of scrape pool reloads.",
},
)
sm.targetScrapePoolReloadsFailed = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pool_reloads_failed_total",
Help: "Total number of failed scrape pool reloads.",
},
)
sm.targetScrapePoolExceededTargetLimit = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pool_exceeded_target_limit_total",
Help: "Total number of times scrape pools hit the target limit, during sync or config reload.",
},
)
sm.targetScrapePoolTargetLimit = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "prometheus_target_scrape_pool_target_limit",
Help: "Maximum number of targets allowed in this scrape pool.",
},
[]string{"scrape_job"},
)
sm.targetScrapePoolTargetsAdded = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "prometheus_target_scrape_pool_targets",
Help: "Current number of targets in this scrape pool.",
},
[]string{"scrape_job"},
)
sm.targetScrapePoolSymbolTableItems = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "prometheus_target_scrape_pool_symboltable_items",
Help: "Current number of symbols in table for this scrape pool.",
},
[]string{"scrape_job"},
)
sm.targetScrapePoolSyncsCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pool_sync_total",
Help: "Total number of syncs that were executed on a scrape pool.",
},
[]string{"scrape_job"},
)
sm.targetSyncIntervalLength = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Name: "prometheus_target_sync_length_seconds",
Help: "Actual interval to sync the scrape pool.",
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
},
[]string{"scrape_job"},
)
sm.targetSyncFailed = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "prometheus_target_sync_failed_total",
Help: "Total number of target sync failures.",
},
[]string{"scrape_job"},
)
// Used by targetScraper.
sm.targetScrapeExceededBodySizeLimit = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_exceeded_body_size_limit_total",
Help: "Total number of scrapes that hit the body size limit",
},
)
// Used by scrapeCache.
sm.targetScrapeCacheFlushForced = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_cache_flush_forced_total",
Help: "How many times a scrape cache was flushed due to getting big while scrapes are failing.",
},
)
// Used by scrapeLoop.
sm.targetIntervalLength = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Name: "prometheus_target_interval_length_seconds",
Help: "Actual intervals between scrapes.",
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
},
[]string{"interval"},
)
sm.targetScrapeSampleLimit = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_exceeded_sample_limit_total",
Help: "Total number of scrapes that hit the sample limit and were rejected.",
},
)
sm.targetScrapeSampleDuplicate = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_sample_duplicate_timestamp_total",
Help: "Total number of samples rejected due to duplicate timestamps but different values.",
},
)
sm.targetScrapeSampleOutOfOrder = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_sample_out_of_order_total",
Help: "Total number of samples rejected due to not being out of the expected order.",
},
)
sm.targetScrapeSampleOutOfBounds = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_sample_out_of_bounds_total",
Help: "Total number of samples rejected due to timestamp falling outside of the time bounds.",
},
)
sm.targetScrapePoolExceededLabelLimits = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrape_pool_exceeded_label_limits_total",
Help: "Total number of times scrape pools hit the label limits, during sync or config reload.",
},
)
sm.targetScrapeNativeHistogramBucketLimit = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_exceeded_native_histogram_bucket_limit_total",
Help: "Total number of scrapes that hit the native histogram bucket limit and were rejected.",
},
)
sm.targetScrapeExemplarOutOfOrder = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_target_scrapes_exemplar_out_of_order_total",
Help: "Total number of exemplar rejected due to not being out of the expected order.",
},
)
for _, collector := range []prometheus.Collector{
// Used by Manager.
sm.targetMetadataCache,
sm.targetScrapePools,
sm.targetScrapePoolsFailed,
// Used by scrapePool.
sm.targetReloadIntervalLength,
sm.targetScrapePoolReloads,
sm.targetScrapePoolReloadsFailed,
sm.targetSyncIntervalLength,
sm.targetScrapePoolSyncsCounter,
sm.targetScrapePoolExceededTargetLimit,
sm.targetScrapePoolTargetLimit,
sm.targetScrapePoolTargetsAdded,
sm.targetScrapePoolSymbolTableItems,
sm.targetSyncFailed,
// Used by targetScraper.
sm.targetScrapeExceededBodySizeLimit,
// Used by scrapeCache.
sm.targetScrapeCacheFlushForced,
// Used by scrapeLoop.
sm.targetIntervalLength,
sm.targetScrapeSampleLimit,
sm.targetScrapeSampleDuplicate,
sm.targetScrapeSampleOutOfOrder,
sm.targetScrapeSampleOutOfBounds,
sm.targetScrapeExemplarOutOfOrder,
sm.targetScrapePoolExceededLabelLimits,
sm.targetScrapeNativeHistogramBucketLimit,
} {
err := reg.Register(collector)
if err != nil {
return nil, fmt.Errorf("failed to register scrape metrics: %w", err)
}
}
return sm, nil
}
func (sm *scrapeMetrics) setTargetMetadataCacheGatherer(gatherer TargetsGatherer) {
sm.targetMetadataCache.TargetsGatherer = gatherer
}
// Unregister unregisters all metrics.
func (sm *scrapeMetrics) Unregister() {
sm.reg.Unregister(sm.targetMetadataCache)
sm.reg.Unregister(sm.targetScrapePools)
sm.reg.Unregister(sm.targetScrapePoolsFailed)
sm.reg.Unregister(sm.targetReloadIntervalLength)
sm.reg.Unregister(sm.targetScrapePoolReloads)
sm.reg.Unregister(sm.targetScrapePoolReloadsFailed)
sm.reg.Unregister(sm.targetSyncIntervalLength)
sm.reg.Unregister(sm.targetScrapePoolSyncsCounter)
sm.reg.Unregister(sm.targetScrapePoolExceededTargetLimit)
sm.reg.Unregister(sm.targetScrapePoolTargetLimit)
sm.reg.Unregister(sm.targetScrapePoolTargetsAdded)
sm.reg.Unregister(sm.targetScrapePoolSymbolTableItems)
sm.reg.Unregister(sm.targetSyncFailed)
sm.reg.Unregister(sm.targetScrapeExceededBodySizeLimit)
sm.reg.Unregister(sm.targetScrapeCacheFlushForced)
sm.reg.Unregister(sm.targetIntervalLength)
sm.reg.Unregister(sm.targetScrapeSampleLimit)
sm.reg.Unregister(sm.targetScrapeSampleDuplicate)
sm.reg.Unregister(sm.targetScrapeSampleOutOfOrder)
sm.reg.Unregister(sm.targetScrapeSampleOutOfBounds)
sm.reg.Unregister(sm.targetScrapeExemplarOutOfOrder)
sm.reg.Unregister(sm.targetScrapePoolExceededLabelLimits)
sm.reg.Unregister(sm.targetScrapeNativeHistogramBucketLimit)
}
type TargetsGatherer interface {
TargetsActive() map[string][]*Target
}
// MetadataMetricsCollector is a Custom Collector for the metadata cache metrics.
type MetadataMetricsCollector struct {
CacheEntries *prometheus.Desc
CacheBytes *prometheus.Desc
TargetsGatherer TargetsGatherer
}
// Describe sends the metrics descriptions to the channel.
func (mc *MetadataMetricsCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- mc.CacheEntries
ch <- mc.CacheBytes
}
// Collect creates and sends the metrics for the metadata cache.
func (mc *MetadataMetricsCollector) Collect(ch chan<- prometheus.Metric) {
if mc.TargetsGatherer == nil {
return
}
for tset, targets := range mc.TargetsGatherer.TargetsActive() {
var size, length int
for _, t := range targets {
size += t.SizeMetadata()
length += t.LengthMetadata()
}
ch <- prometheus.MustNewConstMetric(
mc.CacheEntries,
prometheus.GaugeValue,
float64(length),
tset,
)
ch <- prometheus.MustNewConstMetric(
mc.CacheBytes,
prometheus.GaugeValue,
float64(size),
tset,
)
}
}