Merge pull request #14706 from aknuds1/arve/otlp-invalid-histograms
Some checks are pending
buf.build / lint and publish (push) Waiting to run
CI / Go tests (push) Waiting to run
CI / More Go tests (push) Waiting to run
CI / Go tests with previous Go version (push) Waiting to run
CI / UI tests (push) Waiting to run
CI / Go tests on Windows (push) Waiting to run
CI / Mixins tests (push) Waiting to run
CI / Build Prometheus for common architectures (0) (push) Waiting to run
CI / Build Prometheus for common architectures (1) (push) Waiting to run
CI / Build Prometheus for common architectures (2) (push) Waiting to run
CI / Build Prometheus for all architectures (0) (push) Waiting to run
CI / Build Prometheus for all architectures (1) (push) Waiting to run
CI / Build Prometheus for all architectures (10) (push) Waiting to run
CI / Build Prometheus for all architectures (11) (push) Waiting to run
CI / Build Prometheus for all architectures (2) (push) Waiting to run
CI / Build Prometheus for all architectures (3) (push) Waiting to run
CI / Build Prometheus for all architectures (4) (push) Waiting to run
CI / Build Prometheus for all architectures (5) (push) Waiting to run
CI / Build Prometheus for all architectures (6) (push) Waiting to run
CI / Build Prometheus for all architectures (7) (push) Waiting to run
CI / Build Prometheus for all architectures (8) (push) Waiting to run
CI / Build Prometheus for all architectures (9) (push) Waiting to run
CI / Report status of build Prometheus for all architectures (push) Blocked by required conditions
CI / Check generated parser (push) Waiting to run
CI / golangci-lint (push) Waiting to run
CI / fuzzing (push) Waiting to run
CI / codeql (push) Waiting to run
CI / Publish main branch artifacts (push) Blocked by required conditions
CI / Publish release artefacts (push) Blocked by required conditions
CI / Publish UI on npm Registry (push) Blocked by required conditions
Scorecards supply-chain security / Scorecards analysis (push) Waiting to run

OTLP receiver: Warn when encountering invalid exponential histograms
This commit is contained in:
Arve Knudsen 2024-08-21 18:07:45 +02:00 committed by GitHub
commit 7cfe0b1567
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 67 additions and 14 deletions

View file

@ -3,6 +3,7 @@
## unreleased
* [FEATURE] OTLP receiver: Add new option `otlp.promote_resource_attributes`, for any OTel resource attributes that should be promoted to metric labels. #14200
* [ENHANCEMENT] OTLP receiver: Warn when encountering exponential histograms with zero count and non-zero sum. #14706
* [BUGFIX] tsdb/wlog.Watcher.readSegmentForGC: Only count unknown record types against record_decode_failures_total metric. #14042
## 2.54.0-rc.1 / 2024-08-05

View file

@ -26,6 +26,7 @@ import (
"github.com/prometheus/prometheus/model/value"
"github.com/prometheus/prometheus/prompb"
"github.com/prometheus/prometheus/util/annotations"
)
const defaultZeroThreshold = 1e-128
@ -33,13 +34,15 @@ const defaultZeroThreshold = 1e-128
// addExponentialHistogramDataPoints adds OTel exponential histogram data points to the corresponding time series
// as native histogram samples.
func (c *PrometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetric.ExponentialHistogramDataPointSlice,
resource pcommon.Resource, settings Settings, promName string) error {
resource pcommon.Resource, settings Settings, promName string) (annotations.Annotations, error) {
var annots annotations.Annotations
for x := 0; x < dataPoints.Len(); x++ {
pt := dataPoints.At(x)
histogram, err := exponentialToNativeHistogram(pt)
histogram, ws, err := exponentialToNativeHistogram(pt)
annots.Merge(ws)
if err != nil {
return err
return annots, err
}
lbls := createAttributes(
@ -58,15 +61,16 @@ func (c *PrometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetr
ts.Exemplars = append(ts.Exemplars, exemplars...)
}
return nil
return annots, nil
}
// exponentialToNativeHistogram translates OTel Exponential Histogram data point
// to Prometheus Native Histogram.
func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prompb.Histogram, error) {
func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prompb.Histogram, annotations.Annotations, error) {
var annots annotations.Annotations
scale := p.Scale()
if scale < -4 {
return prompb.Histogram{},
return prompb.Histogram{}, annots,
fmt.Errorf("cannot convert exponential to native histogram."+
" Scale must be >= -4, was %d", scale)
}
@ -114,8 +118,11 @@ func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prom
h.Sum = p.Sum()
}
h.Count = &prompb.Histogram_CountInt{CountInt: p.Count()}
if p.Count() == 0 && h.Sum != 0 {
annots.Add(fmt.Errorf("exponential histogram data point has zero count, but non-zero sum: %f", h.Sum))
}
}
return h, nil
return h, annots, nil
}
// convertBucketsLayout translates OTel Exponential Histogram dense buckets

View file

@ -27,6 +27,7 @@ import (
"github.com/prometheus/prometheus/prompb"
prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus"
"github.com/prometheus/prometheus/util/annotations"
)
type Settings struct {
@ -53,7 +54,7 @@ func NewPrometheusConverter() *PrometheusConverter {
}
// FromMetrics converts pmetric.Metrics to Prometheus remote write format.
func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) (errs error) {
func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) (annots annotations.Annotations, errs error) {
resourceMetricsSlice := md.ResourceMetrics()
for i := 0; i < resourceMetricsSlice.Len(); i++ {
resourceMetrics := resourceMetricsSlice.At(i)
@ -107,12 +108,14 @@ func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings)
errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name()))
break
}
errs = multierr.Append(errs, c.addExponentialHistogramDataPoints(
ws, err := c.addExponentialHistogramDataPoints(
dataPoints,
resource,
settings,
promName,
))
)
annots.Merge(ws)
errs = multierr.Append(errs, err)
case pmetric.MetricTypeSummary:
dataPoints := metric.Summary().DataPoints()
if dataPoints.Len() == 0 {
@ -128,7 +131,7 @@ func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings)
addResourceTargetInfo(resource, settings, mostRecentTimestamp, c)
}
return
return annots, errs
}
func isSameMetric(ts *prompb.TimeSeries, lbls []prompb.Label) bool {

View file

@ -27,6 +27,41 @@ import (
"go.opentelemetry.io/collector/pdata/pmetric/pmetricotlp"
)
func TestFromMetrics(t *testing.T) {
t.Run("exponential histogram warnings for zero count and non-zero sum", func(t *testing.T) {
request := pmetricotlp.NewExportRequest()
rm := request.Metrics().ResourceMetrics().AppendEmpty()
generateAttributes(rm.Resource().Attributes(), "resource", 10)
metrics := rm.ScopeMetrics().AppendEmpty().Metrics()
ts := pcommon.NewTimestampFromTime(time.Now())
for i := 1; i <= 10; i++ {
m := metrics.AppendEmpty()
m.SetEmptyExponentialHistogram()
m.SetName(fmt.Sprintf("histogram-%d", i))
m.ExponentialHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative)
h := m.ExponentialHistogram().DataPoints().AppendEmpty()
h.SetTimestamp(ts)
h.SetCount(0)
h.SetSum(155)
generateAttributes(h.Attributes(), "series", 10)
}
converter := NewPrometheusConverter()
annots, err := converter.FromMetrics(request.Metrics(), Settings{})
require.NoError(t, err)
require.NotEmpty(t, annots)
ws, infos := annots.AsStrings("", 0, 0)
require.Empty(t, infos)
require.Equal(t, []string{
"exponential histogram data point has zero count, but non-zero sum: 155.000000",
}, ws)
})
}
func BenchmarkPrometheusConverter_FromMetrics(b *testing.B) {
for _, resourceAttributeCount := range []int{0, 5, 50} {
b.Run(fmt.Sprintf("resource attribute count: %v", resourceAttributeCount), func(b *testing.B) {
@ -49,7 +84,9 @@ func BenchmarkPrometheusConverter_FromMetrics(b *testing.B) {
for i := 0; i < b.N; i++ {
converter := NewPrometheusConverter()
require.NoError(b, converter.FromMetrics(payload.Metrics(), Settings{}))
annots, err := converter.FromMetrics(payload.Metrics(), Settings{})
require.NoError(b, err)
require.Empty(b, annots)
require.NotNil(b, converter.TimeSeries())
}
})

View file

@ -502,12 +502,17 @@ func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
otlpCfg := h.configFunc().OTLPConfig
converter := otlptranslator.NewPrometheusConverter()
if err := converter.FromMetrics(req.Metrics(), otlptranslator.Settings{
annots, err := converter.FromMetrics(req.Metrics(), otlptranslator.Settings{
AddMetricSuffixes: true,
PromoteResourceAttributes: otlpCfg.PromoteResourceAttributes,
}); err != nil {
})
if err != nil {
level.Warn(h.logger).Log("msg", "Error translating OTLP metrics to Prometheus write request", "err", err)
}
ws, _ := annots.AsStrings("", 0, 0)
if len(ws) > 0 {
level.Warn(h.logger).Log("msg", "Warnings translating OTLP metrics to Prometheus write request", "warnings", ws)
}
err = h.rwHandler.write(r.Context(), &prompb.WriteRequest{
Timeseries: converter.TimeSeries(),