From 1fa34984fab9727cbc71b3f1e891de0dfc2f0907 Mon Sep 17 00:00:00 2001 From: Owen Williams Date: Tue, 11 Feb 2025 16:09:06 -0500 Subject: [PATCH] otlp: Create experimental no-translation mode Add an option to not add suffixes. This creates the possibility of collisions when metrics have the same name but different type or unit, but this enables better testing of the OTLP flow Signed-off-by: Owen Williams --- config/config.go | 11 ++- config/config_test.go | 20 +++++ config/testdata/otlp_no_translation.good.yml | 2 + storage/remote/write_handler.go | 4 +- storage/remote/write_handler_test.go | 13 ++++ storage/remote/write_test.go | 79 ++++++++++++++++++-- 6 files changed, 120 insertions(+), 9 deletions(-) create mode 100644 config/testdata/otlp_no_translation.good.yml diff --git a/config/config.go b/config/config.go index 73282ac429..a4ae001d1b 100644 --- a/config/config.go +++ b/config/config.go @@ -110,7 +110,7 @@ func Load(s string, logger *slog.Logger) (*Config, error) { switch cfg.OTLPConfig.TranslationStrategy { case UnderscoreEscapingWithSuffixes: case "": - case NoUTF8EscapingWithSuffixes: + case NoTranslation, NoUTF8EscapingWithSuffixes: if cfg.GlobalConfig.MetricNameValidationScheme == LegacyValidationConfig { return nil, errors.New("OTLP translation strategy NoUTF8EscapingWithSuffixes is not allowed when UTF8 is disabled") } @@ -1435,6 +1435,15 @@ var ( // and label name characters that are not alphanumerics/underscores to underscores. // Unit and type suffixes may be appended to metric names, according to certain rules. UnderscoreEscapingWithSuffixes translationStrategyOption = "UnderscoreEscapingWithSuffixes" + // NoTranslation (EXPERIMENTAL): disables all translation of incoming metric + // and label names. Note that because metrics in Open Telemetry are considered + // distinct if they share the same name but have different Type or Units, for + // instance "foo.bar" with units Seconds is a separate series from "foo.bar" + // with units Milliseconds. Because prometheus does not yet support type and + // unit metadata, these two series would be conflated in Prometheus. + // Therefore this setting is experimental and should not be used in + // production systems. + NoTranslation translationStrategyOption = "NoTranslation" ) // OTLPConfig is the configuration for writing to the OTLP endpoint. diff --git a/config/config_test.go b/config/config_test.go index 437b858b00..cad8f4f68e 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -1590,6 +1590,26 @@ func TestOTLPAllowUTF8(t *testing.T) { }) }) + t.Run("good config, no translation", func(t *testing.T) { + fpath := filepath.Join("testdata", "otlp_no_translation.good.yml") + verify := func(t *testing.T, conf *Config, err error) { + t.Helper() + require.NoError(t, err) + require.Equal(t, NoTranslation, conf.OTLPConfig.TranslationStrategy) + } + + t.Run("LoadFile", func(t *testing.T) { + conf, err := LoadFile(fpath, false, promslog.NewNopLogger()) + verify(t, conf, err) + }) + t.Run("Load", func(t *testing.T) { + content, err := os.ReadFile(fpath) + require.NoError(t, err) + conf, err := Load(string(content), promslog.NewNopLogger()) + verify(t, conf, err) + }) + }) + t.Run("incompatible config", func(t *testing.T) { fpath := filepath.Join("testdata", "otlp_allow_utf8.incompatible.yml") verify := func(t *testing.T, err error) { diff --git a/config/testdata/otlp_no_translation.good.yml b/config/testdata/otlp_no_translation.good.yml new file mode 100644 index 0000000000..e5c4460842 --- /dev/null +++ b/config/testdata/otlp_no_translation.good.yml @@ -0,0 +1,2 @@ +otlp: + translation_strategy: NoTranslation diff --git a/storage/remote/write_handler.go b/storage/remote/write_handler.go index 02585539c0..9147206673 100644 --- a/storage/remote/write_handler.go +++ b/storage/remote/write_handler.go @@ -577,8 +577,8 @@ func (rw *rwExporter) ConsumeMetrics(ctx context.Context, md pmetric.Metrics) er converter := otlptranslator.NewPrometheusConverter() annots, err := converter.FromMetrics(ctx, md, otlptranslator.Settings{ - AddMetricSuffixes: true, - AllowUTF8: otlpCfg.TranslationStrategy == config.NoUTF8EscapingWithSuffixes, + AddMetricSuffixes: otlpCfg.TranslationStrategy != config.NoTranslation, + AllowUTF8: otlpCfg.TranslationStrategy != config.UnderscoreEscapingWithSuffixes, PromoteResourceAttributes: otlpCfg.PromoteResourceAttributes, KeepIdentifyingResourceAttributes: otlpCfg.KeepIdentifyingResourceAttributes, }) diff --git a/storage/remote/write_handler_test.go b/storage/remote/write_handler_test.go index b37b3632b9..6a486b2394 100644 --- a/storage/remote/write_handler_test.go +++ b/storage/remote/write_handler_test.go @@ -844,6 +844,19 @@ func requireEqual(t *testing.T, expected, actual interface{}, msgAndArgs ...inte msgAndArgs...) } +func requireContainsSample(t *testing.T, actual []mockSample, expected mockSample) { + t.Helper() + + for _, got := range actual { + if labels.Equal(expected.l, got.l) && expected.t == got.t && expected.v == got.v { + return + } + } + require.Fail(t, fmt.Sprintf("Sample not found: \n"+ + "expected: %v\n"+ + "actual : %v", expected, actual)) +} + func (m *mockAppendable) Appender(_ context.Context) storage.Appender { if m.latestSample == nil { m.latestSample = map[uint64]int64{} diff --git a/storage/remote/write_test.go b/storage/remote/write_test.go index 8125da7f6e..f137e33d01 100644 --- a/storage/remote/write_test.go +++ b/storage/remote/write_test.go @@ -382,7 +382,44 @@ func TestWriteStorageApplyConfig_PartialUpdate(t *testing.T) { func TestOTLPWriteHandler(t *testing.T) { exportRequest := generateOTLPWriteRequest() + resp, appendable := handleOtlp(t, exportRequest) + require.Equal(t, http.StatusOK, resp.StatusCode) + require.Len(t, appendable.samples, 12) // 1 (counter) + 1 (gauge) + 1 (target_info) + 7 (hist_bucket) + 2 (hist_sum, hist_count) + require.Len(t, appendable.histograms, 1) // 1 (exponential histogram) + require.Len(t, appendable.exemplars, 1) // 1 (exemplar) +} + +func TestOTLPWriteHandlerNoTranslation(t *testing.T) { + timestamp := time.Now() + exportRequest := generateCounterOTLPWriteRequest(timestamp) + resp, appendable := handleOtlp(t, exportRequest) + require.Equal(t, http.StatusOK, resp.StatusCode) + + requireContainsSample(t, appendable.samples, mockSample{ + l: labels.New( + labels.Label{Name: "__name__", Value: "test.counter"}, + labels.Label{Name: "foo.bar", Value: "baz"}, + labels.Label{Name: "instance", Value: "test-instance"}, + labels.Label{Name: "job", Value: "test-service"}, + ), + t: timestamp.UnixMilli(), + v: 10, + }) + + requireContainsSample(t, appendable.samples, mockSample{ + l: labels.New( + labels.Label{Name: "__name__", Value: "target_info"}, + labels.Label{Name: "host.name", Value: "test-host"}, + labels.Label{Name: "instance", Value: "test-instance"}, + labels.Label{Name: "job", Value: "test-service"}, + ), + t: timestamp.UnixMilli(), + v: 1, + }) +} + +func handleOtlp(t *testing.T, exportRequest pmetricotlp.ExportRequest) (*http.Response, *mockAppendable) { buf, err := exportRequest.MarshalProto() require.NoError(t, err) @@ -391,9 +428,11 @@ func TestOTLPWriteHandler(t *testing.T) { req.Header.Set("Content-Type", "application/x-protobuf") appendable := &mockAppendable{} + conf := config.DefaultOTLPConfig + conf.TranslationStrategy = config.NoTranslation handler := NewOTLPWriteHandler(nil, nil, appendable, func() config.Config { return config.Config{ - OTLPConfig: config.DefaultOTLPConfig, + OTLPConfig: conf, } }, OTLPOptions{}) @@ -401,11 +440,39 @@ func TestOTLPWriteHandler(t *testing.T) { handler.ServeHTTP(recorder, req) resp := recorder.Result() - require.Equal(t, http.StatusOK, resp.StatusCode) + return resp, appendable +} - require.Len(t, appendable.samples, 12) // 1 (counter) + 1 (gauge) + 1 (target_info) + 7 (hist_bucket) + 2 (hist_sum, hist_count) - require.Len(t, appendable.histograms, 1) // 1 (exponential histogram) - require.Len(t, appendable.exemplars, 1) // 1 (exemplar) +func generateCounterOTLPWriteRequest(timestamp time.Time) pmetricotlp.ExportRequest { + d := pmetric.NewMetrics() + + resourceMetric := d.ResourceMetrics().AppendEmpty() + resourceMetric.Resource().Attributes().PutStr("service.name", "test-service") + resourceMetric.Resource().Attributes().PutStr("service.instance.id", "test-instance") + resourceMetric.Resource().Attributes().PutStr("host.name", "test-host") + + scopeMetric := resourceMetric.ScopeMetrics().AppendEmpty() + + counterMetric := scopeMetric.Metrics().AppendEmpty() + counterMetric.SetName("test.counter") + counterMetric.SetDescription("test-counter-description") + counterMetric.SetEmptySum() + counterMetric.Sum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + counterMetric.Sum().SetIsMonotonic(true) + + counterDataPoint := counterMetric.Sum().DataPoints().AppendEmpty() + counterDataPoint.SetTimestamp(pcommon.NewTimestampFromTime(timestamp)) + counterDataPoint.SetDoubleValue(10.0) + counterDataPoint.Attributes().PutStr("foo.bar", "baz") + + counterExemplar := counterDataPoint.Exemplars().AppendEmpty() + + counterExemplar.SetTimestamp(pcommon.NewTimestampFromTime(timestamp)) + counterExemplar.SetDoubleValue(10.0) + counterExemplar.SetSpanID(pcommon.SpanID{0, 1, 2, 3, 4, 5, 6, 7}) + counterExemplar.SetTraceID(pcommon.TraceID{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}) + + return pmetricotlp.NewExportRequestFromMetrics(d) } func generateOTLPWriteRequest() pmetricotlp.ExportRequest { @@ -426,7 +493,7 @@ func generateOTLPWriteRequest() pmetricotlp.ExportRequest { // Generate One Counter counterMetric := scopeMetric.Metrics().AppendEmpty() - counterMetric.SetName("test-counter") + counterMetric.SetName("test.counter") counterMetric.SetDescription("test-counter-description") counterMetric.SetEmptySum() counterMetric.Sum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative)