mirror of
https://github.com/prometheus/prometheus.git
synced 2025-03-05 20:59:13 -08:00
Merge pull request #13202 from krajorama/backport-13005-to-2.28
[2.48] Backport: Native histograms vs labels (#13005)
This commit is contained in:
commit
0bd4e739a0
|
@ -125,7 +125,61 @@ histogram (albeit via the text format). With this flag enabled, Prometheus will
|
|||
still ingest those conventional histograms that do not come with a
|
||||
corresponding native histogram. However, if a native histogram is present,
|
||||
Prometheus will ignore the corresponding conventional histogram, with the
|
||||
notable exception of exemplars, which are always ingested.
|
||||
notable exception of exemplars, which are always ingested. To keep the
|
||||
conventional histograms as well, enable `scrape_classic_histograms` in the
|
||||
scrape job.
|
||||
|
||||
_Note about the format of `le` and `quantile` label values:_
|
||||
|
||||
In certain situations, the protobuf parsing changes the number formatting of
|
||||
the `le` labels of conventional histograms and the `quantile` labels of
|
||||
summaries. Typically, this happens if the scraped target is instrumented with
|
||||
[client_golang](https://github.com/prometheus/client_golang) provided that
|
||||
[promhttp.HandlerOpts.EnableOpenMetrics](https://pkg.go.dev/github.com/prometheus/client_golang/prometheus/promhttp#HandlerOpts)
|
||||
is set to `false`. In such a case, integer label values are represented in the
|
||||
text format as such, e.g. `quantile="1"` or `le="2"`. However, the protobuf parsing
|
||||
changes the representation to float-like (following the OpenMetrics
|
||||
specification), so the examples above become `quantile="1.0"` and `le="2.0"` after
|
||||
ingestion into Prometheus, which changes the identity of the metric compared to
|
||||
what was ingested before via the text format.
|
||||
|
||||
The effect of this change is that alerts, recording rules and dashboards that
|
||||
directly reference label values as whole numbers such as `le="1"` will stop
|
||||
working.
|
||||
|
||||
Aggregation by the `le` and `quantile` labels for vectors that contain the old and
|
||||
new formatting will lead to unexpected results, and range vectors that span the
|
||||
transition between the different formatting will contain additional series.
|
||||
The most common use case for both is the quantile calculation via
|
||||
`histogram_quantile`, e.g.
|
||||
`histogram_quantile(0.95, sum by (le) (rate(histogram_bucket[10m])))`.
|
||||
The `histogram_quantile` function already tries to mitigate the effects to some
|
||||
extent, but there will be inaccuracies, in particular for shorter ranges that
|
||||
cover only a few samples.
|
||||
|
||||
Ways to deal with this change either globally or on a per metric basis:
|
||||
|
||||
- Fix references to integer `le`, `quantile` label values, but otherwise do
|
||||
nothing and accept that some queries that span the transition time will produce
|
||||
inaccurate or unexpected results.
|
||||
_This is the recommended solution, to get consistently normalized label values._
|
||||
Also Prometheus 3.0 is expected to enforce normalization of these label values.
|
||||
- Use `metric_relabel_config` to retain the old labels when scraping targets.
|
||||
This should **only** be applied to metrics that currently produce such labels.
|
||||
|
||||
<!-- The following config snippet is unit tested in scrape/scrape_test.go. -->
|
||||
```yaml
|
||||
metric_relabel_configs:
|
||||
- source_labels:
|
||||
- quantile
|
||||
target_label: quantile
|
||||
regex: (\d+)\.0+
|
||||
- source_labels:
|
||||
- le
|
||||
- __name__
|
||||
target_label: le
|
||||
regex: (\d+)\.0+;.*_bucket
|
||||
```
|
||||
|
||||
## OTLP Receiver
|
||||
|
||||
|
|
|
@ -3629,6 +3629,131 @@ func TestTargetScrapeIntervalAndTimeoutRelabel(t *testing.T) {
|
|||
require.Equal(t, "750ms", sp.ActiveTargets()[0].labels.Get(model.ScrapeTimeoutLabel))
|
||||
}
|
||||
|
||||
// Testing whether we can remove trailing .0 from histogram 'le' and summary 'quantile' labels.
|
||||
func TestLeQuantileReLabel(t *testing.T) {
|
||||
simpleStorage := teststorage.New(t)
|
||||
defer simpleStorage.Close()
|
||||
|
||||
config := &config.ScrapeConfig{
|
||||
JobName: "test",
|
||||
MetricRelabelConfigs: []*relabel.Config{
|
||||
{
|
||||
SourceLabels: model.LabelNames{"le", "__name__"},
|
||||
Regex: relabel.MustNewRegexp("(\\d+)\\.0+;.*_bucket"),
|
||||
Replacement: relabel.DefaultRelabelConfig.Replacement,
|
||||
Separator: relabel.DefaultRelabelConfig.Separator,
|
||||
TargetLabel: "le",
|
||||
Action: relabel.Replace,
|
||||
},
|
||||
{
|
||||
SourceLabels: model.LabelNames{"quantile"},
|
||||
Regex: relabel.MustNewRegexp("(\\d+)\\.0+"),
|
||||
Replacement: relabel.DefaultRelabelConfig.Replacement,
|
||||
Separator: relabel.DefaultRelabelConfig.Separator,
|
||||
TargetLabel: "quantile",
|
||||
Action: relabel.Replace,
|
||||
},
|
||||
},
|
||||
SampleLimit: 100,
|
||||
Scheme: "http",
|
||||
ScrapeInterval: model.Duration(100 * time.Millisecond),
|
||||
ScrapeTimeout: model.Duration(100 * time.Millisecond),
|
||||
}
|
||||
|
||||
metricsText := `
|
||||
# HELP test_histogram This is a histogram with default buckets
|
||||
# TYPE test_histogram histogram
|
||||
test_histogram_bucket{address="0.0.0.0",port="5001",le="0.005"} 0
|
||||
test_histogram_bucket{address="0.0.0.0",port="5001",le="0.01"} 0
|
||||
test_histogram_bucket{address="0.0.0.0",port="5001",le="0.025"} 0
|
||||
test_histogram_bucket{address="0.0.0.0",port="5001",le="0.05"} 0
|
||||
test_histogram_bucket{address="0.0.0.0",port="5001",le="0.1"} 0
|
||||
test_histogram_bucket{address="0.0.0.0",port="5001",le="0.25"} 0
|
||||
test_histogram_bucket{address="0.0.0.0",port="5001",le="0.5"} 0
|
||||
test_histogram_bucket{address="0.0.0.0",port="5001",le="1.0"} 0
|
||||
test_histogram_bucket{address="0.0.0.0",port="5001",le="2.5"} 0
|
||||
test_histogram_bucket{address="0.0.0.0",port="5001",le="5.0"} 0
|
||||
test_histogram_bucket{address="0.0.0.0",port="5001",le="10.0"} 0
|
||||
test_histogram_bucket{address="0.0.0.0",port="5001",le="+Inf"} 0
|
||||
test_histogram_sum{address="0.0.0.0",port="5001"} 0
|
||||
test_histogram_count{address="0.0.0.0",port="5001"} 0
|
||||
# HELP test_summary Number of inflight requests sampled at a regular interval. Quantile buckets keep track of inflight requests over the last 60s.
|
||||
# TYPE test_summary summary
|
||||
test_summary{quantile="0.5"} 0
|
||||
test_summary{quantile="0.9"} 0
|
||||
test_summary{quantile="0.95"} 0
|
||||
test_summary{quantile="0.99"} 0
|
||||
test_summary{quantile="1.0"} 1
|
||||
test_summary_sum 1
|
||||
test_summary_count 199
|
||||
`
|
||||
|
||||
// The expected "le" values do not have the trailing ".0".
|
||||
expectedLeValues := []string{"0.005", "0.01", "0.025", "0.05", "0.1", "0.25", "0.5", "1", "2.5", "5", "10", "+Inf"}
|
||||
|
||||
// The expected "quantile" values do not have the trailing ".0".
|
||||
expectedQuantileValues := []string{"0.5", "0.9", "0.95", "0.99", "1"}
|
||||
|
||||
scrapeCount := 0
|
||||
scraped := make(chan bool)
|
||||
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
fmt.Fprint(w, metricsText)
|
||||
scrapeCount++
|
||||
if scrapeCount > 2 {
|
||||
close(scraped)
|
||||
}
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
sp, err := newScrapePool(config, simpleStorage, 0, nil, &Options{})
|
||||
require.NoError(t, err)
|
||||
defer sp.stop()
|
||||
|
||||
testURL, err := url.Parse(ts.URL)
|
||||
require.NoError(t, err)
|
||||
sp.Sync([]*targetgroup.Group{
|
||||
{
|
||||
Targets: []model.LabelSet{{model.AddressLabel: model.LabelValue(testURL.Host)}},
|
||||
},
|
||||
})
|
||||
require.Equal(t, 1, len(sp.ActiveTargets()))
|
||||
|
||||
select {
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatalf("target was not scraped")
|
||||
case <-scraped:
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
q, err := simpleStorage.Querier(time.Time{}.UnixNano(), time.Now().UnixNano())
|
||||
require.NoError(t, err)
|
||||
defer q.Close()
|
||||
|
||||
checkValues := func(labelName string, expectedValues []string, series storage.SeriesSet) {
|
||||
foundLeValues := map[string]bool{}
|
||||
|
||||
for series.Next() {
|
||||
s := series.At()
|
||||
v := s.Labels().Get(labelName)
|
||||
require.NotContains(t, foundLeValues, v, "duplicate label value found")
|
||||
foundLeValues[v] = true
|
||||
}
|
||||
|
||||
require.Equal(t, len(expectedValues), len(foundLeValues), "number of label values not as expected")
|
||||
for _, v := range expectedValues {
|
||||
require.Contains(t, foundLeValues, v, "label value not found")
|
||||
}
|
||||
}
|
||||
|
||||
series := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "test_histogram_bucket"))
|
||||
checkValues("le", expectedLeValues, series)
|
||||
|
||||
series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "test_summary"))
|
||||
checkValues("quantile", expectedQuantileValues, series)
|
||||
}
|
||||
|
||||
func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t *testing.T) {
|
||||
appender := &collectResultAppender{}
|
||||
var (
|
||||
|
|
Loading…
Reference in a new issue