diff --git a/config/config.go b/config/config.go index 56350e1be3..e4045eb05f 100644 --- a/config/config.go +++ b/config/config.go @@ -382,11 +382,21 @@ type ScrapeConfig struct { MetricsPath string `yaml:"metrics_path,omitempty"` // The URL scheme with which to fetch metrics from targets. Scheme string `yaml:"scheme,omitempty"` - // More than this many samples post metric-relabeling will cause the scrape to fail. + // More than this many samples post metric-relabeling will cause the scrape to + // fail. SampleLimit uint `yaml:"sample_limit,omitempty"` // More than this many targets after the target relabeling will cause the // scrapes to fail. TargetLimit uint `yaml:"target_limit,omitempty"` + // More than this many labels post metric-relabeling will cause the scrape to + // fail. + LabelLimit uint `yaml:"label_limit,omitempty"` + // More than this label name length post metric-relabeling will cause the + // scrape to fail. + LabelNameLengthLimit uint `yaml:"label_name_length_limit,omitempty"` + // More than this label value length post metric-relabeling will cause the + // scrape to fail. + LabelValueLengthLimit uint `yaml:"label_value_length_limit,omitempty"` // We cannot do proper Go type embedding below as the parser will then parse // values arbitrarily into the overflow maps of further-down types. diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 72135a2702..ac45fa82f2 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -288,6 +288,21 @@ metric_relabel_configs: # the entire scrape will be treated as failed. 0 means no limit. [ sample_limit: | default = 0 ] +# Per-scrape limit on number of labels that will be accepted for a sample. If +# more than this number of labels are present post metric-relabeling, the +# entire scrape will be treated as failed. 0 means no limit. +[ label_limit: | default = 0 ] + +# Per-scrape limit on length of labels name that will be accepted for a sample. +# If a label name is longer than this number post metric-relabeling, the entire +# scrape will be treated as failed. 0 means no limit. +[ label_name_length_limit: | default = 0 ] + +# Per-scrape limit on length of labels value that will be accepted for a sample. +# If a label value is longer than this number post metric-relabeling, the +# entire scrape will be treated as failed. 0 means no limit. +[ label_value_length_limit: | default = 0 ] + # Per-scrape config limit on number of unique targets that will be # accepted. If more than this number of targets are present after target # relabeling, Prometheus will mark the targets as failed without scraping them. diff --git a/documentation/prometheus-mixin/alerts.libsonnet b/documentation/prometheus-mixin/alerts.libsonnet index f555a4cdfe..d4face5772 100644 --- a/documentation/prometheus-mixin/alerts.libsonnet +++ b/documentation/prometheus-mixin/alerts.libsonnet @@ -261,6 +261,20 @@ description: 'Prometheus %(prometheusName)s has dropped {{ printf "%%.0f" $value }} targets because the number of targets exceeded the configured target_limit.' % $._config, }, }, + { + alert: 'PrometheusLabelLimitHit', + expr: ||| + increase(prometheus_target_scrape_pool_exceeded_label_limits_total{%(prometheusSelector)s}[5m]) > 0 + ||| % $._config, + 'for': '15m', + labels: { + severity: 'warning', + }, + annotations: { + summary: 'Prometheus has dropped targets because some scrape configs have exceeded the labels limit.', + description: 'Prometheus %(prometheusName)s has dropped {{ printf "%%.0f" $value }} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit.' % $._config, + }, + }, ] + if $._config.prometheusHAGroupLabels == '' then self.rulesWithoutHA else self.rulesWithHA, rulesWithoutHA:: [ { diff --git a/scrape/scrape.go b/scrape/scrape.go index f7888f3ee6..0985d2f465 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -170,6 +170,12 @@ var ( Help: "Total number of exemplar rejected due to not being out of the expected order.", }, ) + targetScrapePoolExceededLabelLimits = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_target_scrape_pool_exceeded_label_limits_total", + Help: "Total number of times scrape pools hit the label limits, during sync or config reload.", + }, + ) ) func init() { @@ -192,6 +198,7 @@ func init() { targetScrapeCacheFlushForced, targetMetadataCache, targetScrapeExemplarOutOfOrder, + targetScrapePoolExceededLabelLimits, ) } @@ -218,10 +225,17 @@ type scrapePool struct { newLoop func(scrapeLoopOptions) loop } +type labelLimits struct { + labelLimit int + labelNameLengthLimit int + labelValueLengthLimit int +} + type scrapeLoopOptions struct { target *Target scraper scraper - limit int + sampleLimit int + labelLimits *labelLimits honorLabels bool honorTimestamps bool mrc []*relabel.Config @@ -273,10 +287,11 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, jitterSeed return mutateSampleLabels(l, opts.target, opts.honorLabels, opts.mrc) }, func(l labels.Labels) labels.Labels { return mutateReportSampleLabels(l, opts.target) }, - func(ctx context.Context) storage.Appender { return appender(app.Appender(ctx), opts.limit) }, + func(ctx context.Context) storage.Appender { return appender(app.Appender(ctx), opts.sampleLimit) }, cache, jitterSeed, opts.honorTimestamps, + opts.labelLimits, ) } @@ -357,10 +372,15 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error { targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit)) var ( - wg sync.WaitGroup - interval = time.Duration(sp.config.ScrapeInterval) - timeout = time.Duration(sp.config.ScrapeTimeout) - limit = int(sp.config.SampleLimit) + wg sync.WaitGroup + interval = time.Duration(sp.config.ScrapeInterval) + timeout = time.Duration(sp.config.ScrapeTimeout) + sampleLimit = int(sp.config.SampleLimit) + labelLimits = &labelLimits{ + labelLimit: int(sp.config.LabelLimit), + labelNameLengthLimit: int(sp.config.LabelNameLengthLimit), + labelValueLengthLimit: int(sp.config.LabelValueLengthLimit), + } honorLabels = sp.config.HonorLabels honorTimestamps = sp.config.HonorTimestamps mrc = sp.config.MetricRelabelConfigs @@ -383,7 +403,8 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error { newLoop = sp.newLoop(scrapeLoopOptions{ target: t, scraper: s, - limit: limit, + sampleLimit: sampleLimit, + labelLimits: labelLimits, honorLabels: honorLabels, honorTimestamps: honorTimestamps, mrc: mrc, @@ -451,10 +472,15 @@ func (sp *scrapePool) Sync(tgs []*targetgroup.Group) { // It returns after all stopped scrape loops terminated. func (sp *scrapePool) sync(targets []*Target) { var ( - uniqueLoops = make(map[uint64]loop) - interval = time.Duration(sp.config.ScrapeInterval) - timeout = time.Duration(sp.config.ScrapeTimeout) - limit = int(sp.config.SampleLimit) + uniqueLoops = make(map[uint64]loop) + interval = time.Duration(sp.config.ScrapeInterval) + timeout = time.Duration(sp.config.ScrapeTimeout) + sampleLimit = int(sp.config.SampleLimit) + labelLimits = &labelLimits{ + labelLimit: int(sp.config.LabelLimit), + labelNameLengthLimit: int(sp.config.LabelNameLengthLimit), + labelValueLengthLimit: int(sp.config.LabelValueLengthLimit), + } honorLabels = sp.config.HonorLabels honorTimestamps = sp.config.HonorTimestamps mrc = sp.config.MetricRelabelConfigs @@ -469,7 +495,8 @@ func (sp *scrapePool) sync(targets []*Target) { l := sp.newLoop(scrapeLoopOptions{ target: t, scraper: s, - limit: limit, + sampleLimit: sampleLimit, + labelLimits: labelLimits, honorLabels: honorLabels, honorTimestamps: honorTimestamps, mrc: mrc, @@ -544,6 +571,41 @@ func (sp *scrapePool) refreshTargetLimitErr() error { return err } +func verifyLabelLimits(lset labels.Labels, limits *labelLimits) error { + if limits == nil { + return nil + } + + met := lset.Get(labels.MetricName) + if limits.labelLimit > 0 { + nbLabels := len(lset) + if nbLabels > int(limits.labelLimit) { + return fmt.Errorf("label_limit exceeded (metric: %.50s, number of label: %d, limit: %d)", met, nbLabels, limits.labelLimit) + } + } + + if limits.labelNameLengthLimit == 0 && limits.labelValueLengthLimit == 0 { + return nil + } + + for _, l := range lset { + if limits.labelNameLengthLimit > 0 { + nameLength := len(l.Name) + if nameLength > int(limits.labelNameLengthLimit) { + return fmt.Errorf("label_name_length_limit exceeded (metric: %.50s, label: %.50v, name length: %d, limit: %d)", met, l, nameLength, limits.labelNameLengthLimit) + } + } + + if limits.labelValueLengthLimit > 0 { + valueLength := len(l.Value) + if valueLength > int(limits.labelValueLengthLimit) { + return fmt.Errorf("label_value_length_limit exceeded (metric: %.50s, label: %.50v, value length: %d, limit: %d)", met, l, valueLength, limits.labelValueLengthLimit) + } + } + } + return nil +} + func mutateSampleLabels(lset labels.Labels, target *Target, honor bool, rc []*relabel.Config) labels.Labels { lb := labels.NewBuilder(lset) @@ -707,6 +769,7 @@ type scrapeLoop struct { honorTimestamps bool forcedErr error forcedErrMtx sync.Mutex + labelLimits *labelLimits appender func(ctx context.Context) storage.Appender sampleMutator labelsMutator @@ -974,6 +1037,7 @@ func newScrapeLoop(ctx context.Context, cache *scrapeCache, jitterSeed uint64, honorTimestamps bool, + labelLimits *labelLimits, ) *scrapeLoop { if l == nil { l = log.NewNopLogger() @@ -996,6 +1060,7 @@ func newScrapeLoop(ctx context.Context, l: l, parentCtx: ctx, honorTimestamps: honorTimestamps, + labelLimits: labelLimits, } sl.ctx, sl.cancel = context.WithCancel(ctx) @@ -1346,6 +1411,12 @@ loop: err = errNameLabelMandatory break loop } + + // If any label limits is exceeded the scrape should fail. + if err = verifyLabelLimits(lset, sl.labelLimits); err != nil { + targetScrapePoolExceededLabelLimits.Inc() + break loop + } } ref, err = app.Append(ref, lset, t, v) @@ -1577,6 +1648,9 @@ func zeroConfig(c *config.ScrapeConfig) *config.ScrapeConfig { z.ScrapeInterval = 0 z.ScrapeTimeout = 0 z.SampleLimit = 0 + z.LabelLimit = 0 + z.LabelNameLengthLimit = 0 + z.LabelValueLengthLimit = 0 z.HTTPClientConfig = config_util.HTTPClientConfig{} return &z } diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index f21d10157c..93877035e4 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -464,8 +464,8 @@ func TestScrapePoolAppender(t *testing.T) { require.True(t, ok, "Expected base appender but got %T", tl.Appender) loop = sp.newLoop(scrapeLoopOptions{ - target: &Target{}, - limit: 100, + target: &Target{}, + sampleLimit: 100, }) appl, ok = loop.(*scrapeLoop) require.True(t, ok, "Expected scrapeLoop but got %T", loop) @@ -577,6 +577,7 @@ func TestScrapeLoopStopBeforeRun(t *testing.T) { nopMutator, nil, nil, 0, true, + nil, ) // The scrape pool synchronizes on stopping scrape loops. However, new scrape @@ -641,6 +642,7 @@ func TestScrapeLoopStop(t *testing.T) { nil, 0, true, + nil, ) // Terminate loop after 2 scrapes. @@ -708,6 +710,7 @@ func TestScrapeLoopRun(t *testing.T) { nil, 0, true, + nil, ) // The loop must terminate during the initial offset if the context @@ -755,6 +758,7 @@ func TestScrapeLoopRun(t *testing.T) { nil, 0, true, + nil, ) go func() { @@ -806,6 +810,7 @@ func TestScrapeLoopForcedErr(t *testing.T) { nil, 0, true, + nil, ) forcedErr := fmt.Errorf("forced err") @@ -856,6 +861,7 @@ func TestScrapeLoopMetadata(t *testing.T) { cache, 0, true, + nil, ) defer cancel() @@ -905,6 +911,7 @@ func TestScrapeLoopSeriesAdded(t *testing.T) { nil, 0, true, + nil, ) defer cancel() @@ -943,6 +950,7 @@ func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrape(t *testing.T) { nil, 0, true, + nil, ) // Succeed once, several failures, then stop. numScrapes := 0 @@ -997,6 +1005,7 @@ func TestScrapeLoopRunCreatesStaleMarkersOnParseFailure(t *testing.T) { nil, 0, true, + nil, ) // Succeed once, several failures, then stop. @@ -1055,6 +1064,7 @@ func TestScrapeLoopCache(t *testing.T) { nil, 0, true, + nil, ) numScrapes := 0 @@ -1129,6 +1139,7 @@ func TestScrapeLoopCacheMemoryExhaustionProtection(t *testing.T) { nil, 0, true, + nil, ) numScrapes := 0 @@ -1235,6 +1246,7 @@ func TestScrapeLoopAppend(t *testing.T) { nil, 0, true, + nil, ) now := time.Now() @@ -1276,6 +1288,7 @@ func TestScrapeLoopAppendCacheEntryButErrNotFound(t *testing.T) { nil, 0, true, + nil, ) fakeRef := uint64(1) @@ -1325,6 +1338,7 @@ func TestScrapeLoopAppendSampleLimit(t *testing.T) { nil, 0, true, + nil, ) // Get the value of the Counter before performing the append. @@ -1394,6 +1408,7 @@ func TestScrapeLoop_ChangingMetricString(t *testing.T) { nil, 0, true, + nil, ) now := time.Now() @@ -1434,6 +1449,7 @@ func TestScrapeLoopAppendStaleness(t *testing.T) { nil, 0, true, + nil, ) now := time.Now() @@ -1477,6 +1493,7 @@ func TestScrapeLoopAppendNoStalenessIfTimestamp(t *testing.T) { nil, 0, true, + nil, ) now := time.Now() @@ -1578,6 +1595,7 @@ metric_total{n="2"} 2 # {t="2"} 2.0 20000 nil, 0, true, + nil, ) now := time.Now() @@ -1635,6 +1653,7 @@ func TestScrapeLoopAppendExemplarSeries(t *testing.T) { nil, 0, true, + nil, ) now := time.Now() @@ -1679,6 +1698,7 @@ func TestScrapeLoopRunReportsTargetDownOnScrapeError(t *testing.T) { nil, 0, true, + nil, ) scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { @@ -1707,6 +1727,7 @@ func TestScrapeLoopRunReportsTargetDownOnInvalidUTF8(t *testing.T) { nil, 0, true, + nil, ) scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { @@ -1748,6 +1769,7 @@ func TestScrapeLoopAppendGracefullyIfAmendOrOutOfOrderOrOutOfBounds(t *testing.T nil, 0, true, + nil, ) now := time.Unix(1, 0) @@ -1785,6 +1807,7 @@ func TestScrapeLoopOutOfBoundsTimeError(t *testing.T) { nil, 0, true, + nil, ) now := time.Now().Add(20 * time.Minute) @@ -1972,6 +1995,7 @@ func TestScrapeLoop_RespectTimestamps(t *testing.T) { func(ctx context.Context) storage.Appender { return capp }, nil, 0, true, + nil, ) now := time.Now() @@ -2005,6 +2029,7 @@ func TestScrapeLoop_DiscardTimestamps(t *testing.T) { func(ctx context.Context) storage.Appender { return capp }, nil, 0, false, + nil, ) now := time.Now() @@ -2037,6 +2062,7 @@ func TestScrapeLoopDiscardDuplicateLabels(t *testing.T) { nil, 0, true, + nil, ) defer cancel() @@ -2087,6 +2113,7 @@ func TestScrapeLoopDiscardUnnamedMetrics(t *testing.T) { nil, 0, true, + nil, ) defer cancel() @@ -2304,6 +2331,7 @@ func TestScrapeAddFast(t *testing.T) { nil, 0, true, + nil, ) defer cancel() @@ -2387,6 +2415,7 @@ func TestScrapeReportSingleAppender(t *testing.T) { nil, 0, true, + nil, ) numScrapes := 0 @@ -2430,3 +2459,103 @@ func TestScrapeReportSingleAppender(t *testing.T) { t.Fatalf("Scrape wasn't stopped.") } } + +func TestScrapeLoopLabelLimit(t *testing.T) { + tests := []struct { + title string + scrapeLabels string + discoveryLabels []string + labelLimits labelLimits + expectErr bool + }{ + { + title: "Valid number of labels", + scrapeLabels: `metric{l1="1", l2="2"} 0`, + discoveryLabels: nil, + labelLimits: labelLimits{labelLimit: 5}, + expectErr: false, + }, { + title: "Too many labels", + scrapeLabels: `metric{l1="1", l2="2", l3="3", l4="4", l5="5", l6="6"} 0`, + discoveryLabels: nil, + labelLimits: labelLimits{labelLimit: 5}, + expectErr: true, + }, { + title: "Too many labels including discovery labels", + scrapeLabels: `metric{l1="1", l2="2", l3="3", l4="4"} 0`, + discoveryLabels: []string{"l5", "5", "l6", "6"}, + labelLimits: labelLimits{labelLimit: 5}, + expectErr: true, + }, { + title: "Valid labels name length", + scrapeLabels: `metric{l1="1", l2="2"} 0`, + discoveryLabels: nil, + labelLimits: labelLimits{labelNameLengthLimit: 10}, + expectErr: false, + }, { + title: "Label name too long", + scrapeLabels: `metric{label_name_too_long="0"} 0`, + discoveryLabels: nil, + labelLimits: labelLimits{labelNameLengthLimit: 10}, + expectErr: true, + }, { + title: "Discovery label name too long", + scrapeLabels: `metric{l1="1", l2="2"} 0`, + discoveryLabels: []string{"label_name_too_long", "0"}, + labelLimits: labelLimits{labelNameLengthLimit: 10}, + expectErr: true, + }, { + title: "Valid labels value length", + scrapeLabels: `metric{l1="1", l2="2"} 0`, + discoveryLabels: nil, + labelLimits: labelLimits{labelValueLengthLimit: 10}, + expectErr: false, + }, { + title: "Label value too long", + scrapeLabels: `metric{l1="label_value_too_long"} 0`, + discoveryLabels: nil, + labelLimits: labelLimits{labelValueLengthLimit: 10}, + expectErr: true, + }, { + title: "Discovery label value too long", + scrapeLabels: `metric{l1="1", l2="2"} 0`, + discoveryLabels: []string{"l1", "label_value_too_long"}, + labelLimits: labelLimits{labelValueLengthLimit: 10}, + expectErr: true, + }, + } + + for _, test := range tests { + app := &collectResultAppender{} + + discoveryLabels := &Target{ + labels: labels.FromStrings(test.discoveryLabels...), + } + + sl := newScrapeLoop(context.Background(), + nil, nil, nil, + func(l labels.Labels) labels.Labels { + return mutateSampleLabels(l, discoveryLabels, false, nil) + }, + func(l labels.Labels) labels.Labels { + return mutateReportSampleLabels(l, discoveryLabels) + }, + func(ctx context.Context) storage.Appender { return app }, + nil, + 0, + true, + &test.labelLimits, + ) + + slApp := sl.appender(context.Background()) + _, _, _, err := sl.append(slApp, []byte(test.scrapeLabels), "", time.Now()) + + t.Logf("Test:%s", test.title) + if test.expectErr { + require.Error(t, err) + } else { + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + } + } +}