fix(utf8): ensure correct validation when legacy mode turned on (#14736)

fix(utf8): ensure correct validation when legacy mode turned on

This depends on the included update of the prometheus/common dependency.

---------

Signed-off-by: Owen Williams <owen.williams@grafana.com>
This commit is contained in:
Owen Williams 2024-08-28 11:15:42 -04:00 committed by GitHub
parent 1d6e0071b7
commit 9da75328ea
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 146 additions and 15 deletions

4
go.mod
View file

@ -52,9 +52,9 @@ require (
github.com/oklog/ulid v1.3.1
github.com/ovh/go-ovh v1.6.0
github.com/prometheus/alertmanager v0.27.0
github.com/prometheus/client_golang v1.19.1
github.com/prometheus/client_golang v1.20.0
github.com/prometheus/client_model v0.6.1
github.com/prometheus/common v0.55.0
github.com/prometheus/common v0.56.0
github.com/prometheus/common/assets v0.2.0
github.com/prometheus/common/sigv4 v0.1.0
github.com/prometheus/exporter-toolkit v0.11.0

8
go.sum
View file

@ -608,8 +608,8 @@ github.com/prometheus/client_golang v1.3.0/go.mod h1:hJaj2vgQTGQmVCsAACORcieXFeD
github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=
github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=
github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0=
github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE=
github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho=
github.com/prometheus/client_golang v1.20.0 h1:jBzTZ7B099Rg24tny+qngoynol8LtVYlA2bqx3vEloI=
github.com/prometheus/client_golang v1.20.0/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
@ -625,8 +625,8 @@ github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8b
github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc=
github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls=
github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
github.com/prometheus/common v0.56.0 h1:UffReloqkBtvtQEYDg2s+uDPGRrJyC6vZWPGXf6OhPY=
github.com/prometheus/common v0.56.0/go.mod h1:7uRPFSUTbfZWsJ7MHY56sqt7hLQu3bxXHDnNhl8E9qI=
github.com/prometheus/common/assets v0.2.0 h1:0P5OrzoHrYBOSM1OigWL3mY8ZvV2N4zIE/5AahrSrfM=
github.com/prometheus/common/assets v0.2.0/go.mod h1:D17UVUE12bHbim7HzwUvtqm6gwBEaDQ0F+hIGbFbccI=
github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4=

View file

@ -95,12 +95,23 @@ func (ls *Labels) UnmarshalYAML(unmarshal func(interface{}) error) error {
}
// IsValid checks if the metric name or label names are valid.
func (ls Labels) IsValid() bool {
func (ls Labels) IsValid(validationScheme model.ValidationScheme) bool {
err := ls.Validate(func(l Label) error {
if l.Name == model.MetricNameLabel && !model.IsValidMetricName(model.LabelValue(l.Value)) {
return strconv.ErrSyntax
if l.Name == model.MetricNameLabel {
// If the default validation scheme has been overridden with legacy mode,
// we need to call the special legacy validation checker.
if validationScheme == model.LegacyValidation && model.NameValidationScheme == model.UTF8Validation && !model.IsValidLegacyMetricName(string(model.LabelValue(l.Value))) {
return strconv.ErrSyntax
}
if !model.IsValidMetricName(model.LabelValue(l.Value)) {
return strconv.ErrSyntax
}
}
if !model.LabelName(l.Name).IsValid() || !model.LabelValue(l.Value).IsValid() {
if validationScheme == model.LegacyValidation && model.NameValidationScheme == model.UTF8Validation {
if !model.LabelName(l.Name).IsValidLegacy() || !model.LabelValue(l.Value).IsValid() {
return strconv.ErrSyntax
}
} else if !model.LabelName(l.Name).IsValid() || !model.LabelValue(l.Value).IsValid() {
return strconv.ErrSyntax
}
return nil

View file

@ -21,6 +21,7 @@ import (
"strings"
"testing"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
)
@ -272,11 +273,86 @@ func TestLabels_IsValid(t *testing.T) {
},
} {
t.Run("", func(t *testing.T) {
require.Equal(t, test.expected, test.input.IsValid())
require.Equal(t, test.expected, test.input.IsValid(model.LegacyValidation))
})
}
}
func TestLabels_ValidationModes(t *testing.T) {
for _, test := range []struct {
input Labels
globalMode model.ValidationScheme
callMode model.ValidationScheme
expected bool
}{
{
input: FromStrings(
"__name__", "test.metric",
"hostname", "localhost",
"job", "check",
),
globalMode: model.UTF8Validation,
callMode: model.UTF8Validation,
expected: true,
},
{
input: FromStrings(
"__name__", "test",
"\xc5 bad utf8", "localhost",
"job", "check",
),
globalMode: model.UTF8Validation,
callMode: model.UTF8Validation,
expected: false,
},
{
// Setting the common model to legacy validation and then trying to check for UTF-8 on a
// per-call basis is not supported.
input: FromStrings(
"__name__", "test.utf8.metric",
"hostname", "localhost",
"job", "check",
),
globalMode: model.LegacyValidation,
callMode: model.UTF8Validation,
expected: false,
},
{
input: FromStrings(
"__name__", "test",
"hostname", "localhost",
"job", "check",
),
globalMode: model.LegacyValidation,
callMode: model.LegacyValidation,
expected: true,
},
{
input: FromStrings(
"__name__", "test.utf8.metric",
"hostname", "localhost",
"job", "check",
),
globalMode: model.UTF8Validation,
callMode: model.LegacyValidation,
expected: false,
},
{
input: FromStrings(
"__name__", "test",
"host.name", "localhost",
"job", "check",
),
globalMode: model.UTF8Validation,
callMode: model.LegacyValidation,
expected: false,
},
} {
model.NameValidationScheme = test.globalMode
require.Equal(t, test.expected, test.input.IsValid(test.callMode))
}
}
func TestLabels_Equal(t *testing.T) {
labels := FromStrings(
"aaa", "111",

View file

@ -88,7 +88,7 @@ func (node *AggregateExpr) getAggOpStr() string {
func joinLabels(ss []string) string {
for i, s := range ss {
// If the label is already quoted, don't quote it again.
if s[0] != '"' && s[0] != '\'' && s[0] != '`' && !model.IsValidLegacyMetricName(model.LabelValue(s)) {
if s[0] != '"' && s[0] != '\'' && s[0] != '`' && !model.IsValidLegacyMetricName(string(model.LabelValue(s))) {
ss[i] = fmt.Sprintf("\"%s\"", s)
}
}

View file

@ -111,6 +111,7 @@ type scrapeLoopOptions struct {
interval time.Duration
timeout time.Duration
scrapeClassicHistograms bool
validationScheme model.ValidationScheme
mrc []*relabel.Config
cache *scrapeCache
@ -186,6 +187,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed
options.PassMetadataInContext,
metrics,
options.skipOffsetting,
opts.validationScheme,
)
}
sp.metrics.targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit))
@ -346,6 +348,7 @@ func (sp *scrapePool) restartLoops(reuseCache bool) {
cache: cache,
interval: interval,
timeout: timeout,
validationScheme: validationScheme,
})
)
if err != nil {
@ -853,6 +856,7 @@ type scrapeLoop struct {
interval time.Duration
timeout time.Duration
scrapeClassicHistograms bool
validationScheme model.ValidationScheme
// Feature flagged options.
enableNativeHistogramIngestion bool
@ -1160,6 +1164,7 @@ func newScrapeLoop(ctx context.Context,
passMetadataInContext bool,
metrics *scrapeMetrics,
skipOffsetting bool,
validationScheme model.ValidationScheme,
) *scrapeLoop {
if l == nil {
l = log.NewNopLogger()
@ -1211,6 +1216,7 @@ func newScrapeLoop(ctx context.Context,
appendMetadataToWAL: appendMetadataToWAL,
metrics: metrics,
skipOffsetting: skipOffsetting,
validationScheme: validationScheme,
}
sl.ctx, sl.cancel = context.WithCancel(ctx)
@ -1631,7 +1637,7 @@ loop:
err = errNameLabelMandatory
break loop
}
if !lset.IsValid() {
if !lset.IsValid(sl.validationScheme) {
err = fmt.Errorf("invalid metric name or label names: %s", lset.String())
break loop
}

View file

@ -684,6 +684,7 @@ func newBasicScrapeLoop(t testing.TB, ctx context.Context, scraper scraper, app
false,
newTestScrapeMetrics(t),
false,
model.LegacyValidation,
)
}
@ -826,6 +827,7 @@ func TestScrapeLoopRun(t *testing.T) {
false,
scrapeMetrics,
false,
model.LegacyValidation,
)
// The loop must terminate during the initial offset if the context
@ -970,6 +972,7 @@ func TestScrapeLoopMetadata(t *testing.T) {
false,
scrapeMetrics,
false,
model.LegacyValidation,
)
defer cancel()
@ -1065,6 +1068,40 @@ func TestScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T) {
require.Equal(t, 0, seriesAdded)
}
func TestScrapeLoopFailLegacyUnderUTF8(t *testing.T) {
// Test that scrapes fail when default validation is utf8 but scrape config is
// legacy.
model.NameValidationScheme = model.UTF8Validation
defer func() {
model.NameValidationScheme = model.LegacyValidation
}()
s := teststorage.New(t)
defer s.Close()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
sl := newBasicScrapeLoop(t, ctx, &testScraper{}, s.Appender, 0)
sl.validationScheme = model.LegacyValidation
slApp := sl.appender(ctx)
total, added, seriesAdded, err := sl.append(slApp, []byte("{\"test.metric\"} 1\n"), "", time.Time{})
require.ErrorContains(t, err, "invalid metric name or label names")
require.NoError(t, slApp.Rollback())
require.Equal(t, 1, total)
require.Equal(t, 0, added)
require.Equal(t, 0, seriesAdded)
// When scrapeloop has validation set to UTF-8, the metric is allowed.
sl.validationScheme = model.UTF8Validation
slApp = sl.appender(ctx)
total, added, seriesAdded, err = sl.append(slApp, []byte("{\"test.metric\"} 1\n"), "", time.Time{})
require.NoError(t, err)
require.Equal(t, 1, total)
require.Equal(t, 1, added)
require.Equal(t, 1, seriesAdded)
}
func makeTestMetrics(n int) []byte {
// Construct a metrics string to parse
sb := bytes.Buffer{}

View file

@ -28,6 +28,7 @@ import (
"github.com/golang/snappy"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/model/exemplar"
@ -239,7 +240,7 @@ func (h *writeHandler) write(ctx context.Context, req *prompb.WriteRequest) (err
// TODO(bwplotka): Even as per 1.0 spec, this should be a 400 error, while other samples are
// potentially written. Perhaps unify with fixed writeV2 implementation a bit.
if !ls.Has(labels.MetricName) || !ls.IsValid() {
if !ls.Has(labels.MetricName) || !ls.IsValid(model.NameValidationScheme) {
level.Warn(h.logger).Log("msg", "Invalid metric names or labels", "got", ls.String())
samplesWithInvalidLabels++
continue
@ -380,7 +381,7 @@ func (h *writeHandler) appendV2(app storage.Appender, req *writev2.Request, rs *
// Validate series labels early.
// NOTE(bwplotka): While spec allows UTF-8, Prometheus Receiver may impose
// specific limits and follow https://prometheus.io/docs/specs/remote_write_spec_2_0/#invalid-samples case.
if !ls.Has(labels.MetricName) || !ls.IsValid() {
if !ls.Has(labels.MetricName) || !ls.IsValid(model.NameValidationScheme) {
badRequestErrs = append(badRequestErrs, fmt.Errorf("invalid metric name or labels, got %v", ls.String()))
samplesWithInvalidLabels += len(ts.Samples) + len(ts.Histograms)
continue