feat(utf8): utf8 content negotation and flags

Signed-off-by: Owen Williams <owen.williams@grafana.com>
This commit is contained in:
Owen Williams 2024-07-18 14:08:21 -04:00
parent 5fd66ba855
commit 9e7308de38
8 changed files with 103 additions and 14 deletions

View file

@ -152,6 +152,7 @@ type flagConfig struct {
queryConcurrency int queryConcurrency int
queryMaxSamples int queryMaxSamples int
RemoteFlushDeadline model.Duration RemoteFlushDeadline model.Duration
nameEscapingScheme string
featureList []string featureList []string
memlimitRatio float64 memlimitRatio float64
@ -237,6 +238,9 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error {
case "delayed-compaction": case "delayed-compaction":
c.tsdb.EnableDelayedCompaction = true c.tsdb.EnableDelayedCompaction = true
level.Info(logger).Log("msg", "Experimental delayed compaction is enabled.") level.Info(logger).Log("msg", "Experimental delayed compaction is enabled.")
case "utf8-names":
model.NameValidationScheme = model.UTF8Validation
level.Info(logger).Log("msg", "Experimental UTF-8 support enabled")
case "": case "":
continue continue
case "promql-at-modifier", "promql-negative-offset": case "promql-at-modifier", "promql-negative-offset":
@ -481,7 +485,9 @@ func main() {
a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates."). a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates.").
Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval) Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval)
a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details."). a.Flag("scrape.name-escaping-scheme", `Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots".`).Default(scrape.DefaultNameEscapingScheme.String()).StringVar(&cfg.nameEscapingScheme)
a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
Default("").StringsVar(&cfg.featureList) Default("").StringsVar(&cfg.featureList)
promlogflag.AddFlags(a, &cfg.promlogConfig) promlogflag.AddFlags(a, &cfg.promlogConfig)
@ -509,6 +515,15 @@ func main() {
os.Exit(1) os.Exit(1)
} }
if cfg.nameEscapingScheme != "" {
scheme, err := model.ToEscapingScheme(cfg.nameEscapingScheme)
if err != nil {
fmt.Fprintf(os.Stderr, `Invalid name escaping scheme: %q; Needs to be one of "values", "underscores", or "dots"`, cfg.nameEscapingScheme)
os.Exit(1)
}
model.NameEscapingScheme = scheme
}
if agentMode && len(serverOnlyFlags) > 0 { if agentMode && len(serverOnlyFlags) > 0 {
fmt.Fprintf(os.Stderr, "The following flag(s) can not be used in agent mode: %q", serverOnlyFlags) fmt.Fprintf(os.Stderr, "The following flag(s) can not be used in agent mode: %q", serverOnlyFlags)
os.Exit(3) os.Exit(3)

View file

@ -67,6 +67,11 @@ var (
} }
) )
const (
LegacyValidationConfig = "legacy"
UTF8ValidationConfig = "utf8"
)
// Load parses the YAML input s into a Config. // Load parses the YAML input s into a Config.
func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, error) { func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, error) {
cfg := &Config{} cfg := &Config{}
@ -446,6 +451,8 @@ type GlobalConfig struct {
// Keep no more than this many dropped targets per job. // Keep no more than this many dropped targets per job.
// 0 means no limit. // 0 means no limit.
KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"` KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"`
// Allow UTF8 Metric and Label Names.
MetricNameValidationScheme string `yaml:"metric_name_validation_scheme,omitempty"`
} }
// ScrapeProtocol represents supported protocol for scraping metrics. // ScrapeProtocol represents supported protocol for scraping metrics.
@ -471,6 +478,7 @@ var (
PrometheusText0_0_4 ScrapeProtocol = "PrometheusText0.0.4" PrometheusText0_0_4 ScrapeProtocol = "PrometheusText0.0.4"
OpenMetricsText0_0_1 ScrapeProtocol = "OpenMetricsText0.0.1" OpenMetricsText0_0_1 ScrapeProtocol = "OpenMetricsText0.0.1"
OpenMetricsText1_0_0 ScrapeProtocol = "OpenMetricsText1.0.0" OpenMetricsText1_0_0 ScrapeProtocol = "OpenMetricsText1.0.0"
UTF8NamesHeader string = model.EscapingKey + "=" + model.AllowUTF8
ScrapeProtocolsHeaders = map[ScrapeProtocol]string{ ScrapeProtocolsHeaders = map[ScrapeProtocol]string{
PrometheusProto: "application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited", PrometheusProto: "application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited",
@ -656,6 +664,8 @@ type ScrapeConfig struct {
// Keep no more than this many dropped targets per job. // Keep no more than this many dropped targets per job.
// 0 means no limit. // 0 means no limit.
KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"` KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"`
// Allow UTF8 Metric and Label Names.
MetricNameValidationScheme string `yaml:"metric_name_validation_scheme,omitempty"`
// We cannot do proper Go type embedding below as the parser will then parse // We cannot do proper Go type embedding below as the parser will then parse
// values arbitrarily into the overflow maps of further-down types. // values arbitrarily into the overflow maps of further-down types.
@ -762,6 +772,17 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error {
return fmt.Errorf("%w for scrape config with job name %q", err, c.JobName) return fmt.Errorf("%w for scrape config with job name %q", err, c.JobName)
} }
switch globalConfig.MetricNameValidationScheme {
case "", LegacyValidationConfig:
case UTF8ValidationConfig:
if model.NameValidationScheme != model.UTF8Validation {
return fmt.Errorf("utf8 name validation requested but feature not enabled via --enable-feature=utf8-names")
}
default:
return fmt.Errorf("unknown name validation method specified, must be either 'legacy' or 'utf8', got %s", globalConfig.MetricNameValidationScheme)
}
c.MetricNameValidationScheme = globalConfig.MetricNameValidationScheme
return nil return nil
} }

View file

@ -56,7 +56,8 @@ The Prometheus monitoring server
| <code class="text-nowrap">--query.timeout</code> | Maximum time a query may take before being aborted. Use with server mode only. | `2m` | | <code class="text-nowrap">--query.timeout</code> | Maximum time a query may take before being aborted. Use with server mode only. | `2m` |
| <code class="text-nowrap">--query.max-concurrency</code> | Maximum number of queries executed concurrently. Use with server mode only. | `20` | | <code class="text-nowrap">--query.max-concurrency</code> | Maximum number of queries executed concurrently. Use with server mode only. | `20` |
| <code class="text-nowrap">--query.max-samples</code> | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` | | <code class="text-nowrap">--query.max-samples</code> | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` |
| <code class="text-nowrap">--enable-feature</code> | Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | | <code class="text-nowrap">--scrape.name-escaping-scheme</code> | Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots". | `values` |
| <code class="text-nowrap">--enable-feature</code> | Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | |
| <code class="text-nowrap">--log.level</code> | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` | | <code class="text-nowrap">--log.level</code> | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` |
| <code class="text-nowrap">--log.format</code> | Output format of log messages. One of: [logfmt, json] | `logfmt` | | <code class="text-nowrap">--log.format</code> | Output format of log messages. One of: [logfmt, json] | `logfmt` |

View file

@ -121,6 +121,11 @@ global:
# that will be kept in memory. 0 means no limit. # that will be kept in memory. 0 means no limit.
[ keep_dropped_targets: <int> | default = 0 ] [ keep_dropped_targets: <int> | default = 0 ]
# Specifies the validation scheme for metric and label names. Either blank or
# "legacy" for letters, numbers, colons, and underscores; or "utf8" for full
# UTF-8 support.
[ metric_name_validation_scheme <string> | default "legacy" ]
runtime: runtime:
# Configure the Go garbage collector GOGC parameter # Configure the Go garbage collector GOGC parameter
# See: https://tip.golang.org/doc/gc-guide#GOGC # See: https://tip.golang.org/doc/gc-guide#GOGC
@ -461,6 +466,11 @@ metric_relabel_configs:
# that will be kept in memory. 0 means no limit. # that will be kept in memory. 0 means no limit.
[ keep_dropped_targets: <int> | default = 0 ] [ keep_dropped_targets: <int> | default = 0 ]
# Specifies the validation scheme for metric and label names. Either blank or
# "legacy" for letters, numbers, colons, and underscores; or "utf8" for full
# UTF-8 support.
[ metric_name_validation_scheme <string> | default "legacy" ]
# Limit on total number of positive and negative buckets allowed in a single # Limit on total number of positive and negative buckets allowed in a single
# native histogram. The resolution of a histogram with more buckets will be # native histogram. The resolution of a histogram with more buckets will be
# reduced until the number of buckets is within the limit. If the limit cannot # reduced until the number of buckets is within the limit. If the limit cannot

View file

@ -249,3 +249,11 @@ In the event of multiple consecutive Head compactions being possible, only the f
Note that during this delay, the Head continues its usual operations, which include serving and appending series. Note that during this delay, the Head continues its usual operations, which include serving and appending series.
Despite the delay in compaction, the blocks produced are time-aligned in the same manner as they would be if the delay was not in place. Despite the delay in compaction, the blocks produced are time-aligned in the same manner as they would be if the delay was not in place.
## UTF-8 Name Support
`--enable-feature=utf8-names`
When enabled, changes the metric and label name validation scheme inside Prometheus to allow the full UTF-8 character set.
By itself, this flag does not enable the request of UTF-8 names via content negotiation.
Users will also have to set `metric_name_validation_scheme` in scrape configs to enable the feature either on the global config or on a per-scrape config basis.

View file

@ -93,6 +93,8 @@ type Options struct {
skipOffsetting bool skipOffsetting bool
} }
const DefaultNameEscapingScheme = model.ValueEncodingEscaping
// Manager maintains a set of scrape pools and manages start/stop cycles // Manager maintains a set of scrape pools and manages start/stop cycles
// when receiving new target groups from the discovery manager. // when receiving new target groups from the discovery manager.
type Manager struct { type Manager struct {

View file

@ -303,6 +303,11 @@ func (sp *scrapePool) restartLoops(reuseCache bool) {
mrc = sp.config.MetricRelabelConfigs mrc = sp.config.MetricRelabelConfigs
) )
validationScheme := model.LegacyValidation
if sp.config.MetricNameValidationScheme == config.UTF8ValidationConfig {
validationScheme = model.UTF8Validation
}
sp.targetMtx.Lock() sp.targetMtx.Lock()
forcedErr := sp.refreshTargetLimitErr() forcedErr := sp.refreshTargetLimitErr()
@ -323,7 +328,7 @@ func (sp *scrapePool) restartLoops(reuseCache bool) {
client: sp.client, client: sp.client,
timeout: timeout, timeout: timeout,
bodySizeLimit: bodySizeLimit, bodySizeLimit: bodySizeLimit,
acceptHeader: acceptHeader(sp.config.ScrapeProtocols), acceptHeader: acceptHeader(sp.config.ScrapeProtocols, validationScheme),
acceptEncodingHeader: acceptEncodingHeader(enableCompression), acceptEncodingHeader: acceptEncodingHeader(enableCompression),
} }
newLoop = sp.newLoop(scrapeLoopOptions{ newLoop = sp.newLoop(scrapeLoopOptions{
@ -452,6 +457,11 @@ func (sp *scrapePool) sync(targets []*Target) {
scrapeClassicHistograms = sp.config.ScrapeClassicHistograms scrapeClassicHistograms = sp.config.ScrapeClassicHistograms
) )
validationScheme := model.LegacyValidation
if sp.config.MetricNameValidationScheme == config.UTF8ValidationConfig {
validationScheme = model.UTF8Validation
}
sp.targetMtx.Lock() sp.targetMtx.Lock()
for _, t := range targets { for _, t := range targets {
hash := t.hash() hash := t.hash()
@ -467,7 +477,7 @@ func (sp *scrapePool) sync(targets []*Target) {
client: sp.client, client: sp.client,
timeout: timeout, timeout: timeout,
bodySizeLimit: bodySizeLimit, bodySizeLimit: bodySizeLimit,
acceptHeader: acceptHeader(sp.config.ScrapeProtocols), acceptHeader: acceptHeader(sp.config.ScrapeProtocols, validationScheme),
acceptEncodingHeader: acceptEncodingHeader(enableCompression), acceptEncodingHeader: acceptEncodingHeader(enableCompression),
metrics: sp.metrics, metrics: sp.metrics,
} }
@ -714,11 +724,16 @@ var errBodySizeLimit = errors.New("body size limit exceeded")
// acceptHeader transforms preference from the options into specific header values as // acceptHeader transforms preference from the options into specific header values as
// https://www.rfc-editor.org/rfc/rfc9110.html#name-accept defines. // https://www.rfc-editor.org/rfc/rfc9110.html#name-accept defines.
// No validation is here, we expect scrape protocols to be validated already. // No validation is here, we expect scrape protocols to be validated already.
func acceptHeader(sps []config.ScrapeProtocol) string { func acceptHeader(sps []config.ScrapeProtocol, scheme model.ValidationScheme) string {
var vals []string var vals []string
weight := len(config.ScrapeProtocolsHeaders) + 1 weight := len(config.ScrapeProtocolsHeaders) + 1
for _, sp := range sps { for _, sp := range sps {
vals = append(vals, fmt.Sprintf("%s;q=0.%d", config.ScrapeProtocolsHeaders[sp], weight)) val := config.ScrapeProtocolsHeaders[sp]
if scheme == model.UTF8Validation {
val += ";" + config.UTF8NamesHeader
}
val += fmt.Sprintf(";q=0.%d", weight)
vals = append(vals, val)
weight-- weight--
} }
// Default match anything. // Default match anything.

View file

@ -2339,11 +2339,15 @@ func TestTargetScraperScrapeOK(t *testing.T) {
) )
var protobufParsing bool var protobufParsing bool
var allowUTF8 bool
server := httptest.NewServer( server := httptest.NewServer(
http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
accept := r.Header.Get("Accept")
if allowUTF8 {
require.Truef(t, strings.Contains(accept, "escaping=allow-utf-8"), "Expected Accept header to allow utf8, got %q", accept)
}
if protobufParsing { if protobufParsing {
accept := r.Header.Get("Accept")
require.True(t, strings.HasPrefix(accept, "application/vnd.google.protobuf;"), require.True(t, strings.HasPrefix(accept, "application/vnd.google.protobuf;"),
"Expected Accept header to prefer application/vnd.google.protobuf.") "Expected Accept header to prefer application/vnd.google.protobuf.")
} }
@ -2351,7 +2355,11 @@ func TestTargetScraperScrapeOK(t *testing.T) {
timeout := r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds") timeout := r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds")
require.Equal(t, expectedTimeout, timeout, "Expected scrape timeout header.") require.Equal(t, expectedTimeout, timeout, "Expected scrape timeout header.")
w.Header().Set("Content-Type", `text/plain; version=0.0.4`) if allowUTF8 {
w.Header().Set("Content-Type", `text/plain; version=1.0.0; escaping=allow-utf-8`)
} else {
w.Header().Set("Content-Type", `text/plain; version=0.0.4`)
}
w.Write([]byte("metric_a 1\nmetric_b 2\n")) w.Write([]byte("metric_a 1\nmetric_b 2\n"))
}), }),
) )
@ -2380,13 +2388,22 @@ func TestTargetScraperScrapeOK(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
contentType, err := ts.readResponse(context.Background(), resp, &buf) contentType, err := ts.readResponse(context.Background(), resp, &buf)
require.NoError(t, err) require.NoError(t, err)
require.Equal(t, "text/plain; version=0.0.4", contentType) if allowUTF8 {
require.Equal(t, "text/plain; version=1.0.0; escaping=allow-utf-8", contentType)
} else {
require.Equal(t, "text/plain; version=0.0.4", contentType)
}
require.Equal(t, "metric_a 1\nmetric_b 2\n", buf.String()) require.Equal(t, "metric_a 1\nmetric_b 2\n", buf.String())
} }
runTest(acceptHeader(config.DefaultScrapeProtocols)) runTest(acceptHeader(config.DefaultScrapeProtocols, model.LegacyValidation))
protobufParsing = true protobufParsing = true
runTest(acceptHeader(config.DefaultProtoFirstScrapeProtocols)) runTest(acceptHeader(config.DefaultProtoFirstScrapeProtocols, model.LegacyValidation))
protobufParsing = false
allowUTF8 = true
runTest(acceptHeader(config.DefaultScrapeProtocols, model.UTF8Validation))
protobufParsing = true
runTest(acceptHeader(config.DefaultProtoFirstScrapeProtocols, model.UTF8Validation))
} }
func TestTargetScrapeScrapeCancel(t *testing.T) { func TestTargetScrapeScrapeCancel(t *testing.T) {
@ -2412,7 +2429,7 @@ func TestTargetScrapeScrapeCancel(t *testing.T) {
), ),
}, },
client: http.DefaultClient, client: http.DefaultClient,
acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols), acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols, model.LegacyValidation),
} }
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
@ -2467,7 +2484,7 @@ func TestTargetScrapeScrapeNotFound(t *testing.T) {
), ),
}, },
client: http.DefaultClient, client: http.DefaultClient,
acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols), acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols, model.LegacyValidation),
} }
resp, err := ts.scrape(context.Background()) resp, err := ts.scrape(context.Background())
@ -2511,7 +2528,7 @@ func TestTargetScraperBodySizeLimit(t *testing.T) {
}, },
client: http.DefaultClient, client: http.DefaultClient,
bodySizeLimit: bodySizeLimit, bodySizeLimit: bodySizeLimit,
acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols), acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols, model.LegacyValidation),
metrics: newTestScrapeMetrics(t), metrics: newTestScrapeMetrics(t),
} }
var buf bytes.Buffer var buf bytes.Buffer