feat(utf8): utf8 content negotation and flags

Signed-off-by: Owen Williams <owen.williams@grafana.com>
This commit is contained in:
Owen Williams 2024-07-18 14:08:21 -04:00
parent 5fd66ba855
commit 9e7308de38
8 changed files with 103 additions and 14 deletions

View file

@ -152,6 +152,7 @@ type flagConfig struct {
queryConcurrency int
queryMaxSamples int
RemoteFlushDeadline model.Duration
nameEscapingScheme string
featureList []string
memlimitRatio float64
@ -237,6 +238,9 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error {
case "delayed-compaction":
c.tsdb.EnableDelayedCompaction = true
level.Info(logger).Log("msg", "Experimental delayed compaction is enabled.")
case "utf8-names":
model.NameValidationScheme = model.UTF8Validation
level.Info(logger).Log("msg", "Experimental UTF-8 support enabled")
case "":
continue
case "promql-at-modifier", "promql-negative-offset":
@ -481,7 +485,9 @@ func main() {
a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates.").
Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval)
a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
a.Flag("scrape.name-escaping-scheme", `Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots".`).Default(scrape.DefaultNameEscapingScheme.String()).StringVar(&cfg.nameEscapingScheme)
a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
Default("").StringsVar(&cfg.featureList)
promlogflag.AddFlags(a, &cfg.promlogConfig)
@ -509,6 +515,15 @@ func main() {
os.Exit(1)
}
if cfg.nameEscapingScheme != "" {
scheme, err := model.ToEscapingScheme(cfg.nameEscapingScheme)
if err != nil {
fmt.Fprintf(os.Stderr, `Invalid name escaping scheme: %q; Needs to be one of "values", "underscores", or "dots"`, cfg.nameEscapingScheme)
os.Exit(1)
}
model.NameEscapingScheme = scheme
}
if agentMode && len(serverOnlyFlags) > 0 {
fmt.Fprintf(os.Stderr, "The following flag(s) can not be used in agent mode: %q", serverOnlyFlags)
os.Exit(3)

View file

@ -67,6 +67,11 @@ var (
}
)
const (
LegacyValidationConfig = "legacy"
UTF8ValidationConfig = "utf8"
)
// Load parses the YAML input s into a Config.
func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, error) {
cfg := &Config{}
@ -446,6 +451,8 @@ type GlobalConfig struct {
// Keep no more than this many dropped targets per job.
// 0 means no limit.
KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"`
// Allow UTF8 Metric and Label Names.
MetricNameValidationScheme string `yaml:"metric_name_validation_scheme,omitempty"`
}
// ScrapeProtocol represents supported protocol for scraping metrics.
@ -471,6 +478,7 @@ var (
PrometheusText0_0_4 ScrapeProtocol = "PrometheusText0.0.4"
OpenMetricsText0_0_1 ScrapeProtocol = "OpenMetricsText0.0.1"
OpenMetricsText1_0_0 ScrapeProtocol = "OpenMetricsText1.0.0"
UTF8NamesHeader string = model.EscapingKey + "=" + model.AllowUTF8
ScrapeProtocolsHeaders = map[ScrapeProtocol]string{
PrometheusProto: "application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited",
@ -656,6 +664,8 @@ type ScrapeConfig struct {
// Keep no more than this many dropped targets per job.
// 0 means no limit.
KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"`
// Allow UTF8 Metric and Label Names.
MetricNameValidationScheme string `yaml:"metric_name_validation_scheme,omitempty"`
// We cannot do proper Go type embedding below as the parser will then parse
// values arbitrarily into the overflow maps of further-down types.
@ -762,6 +772,17 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error {
return fmt.Errorf("%w for scrape config with job name %q", err, c.JobName)
}
switch globalConfig.MetricNameValidationScheme {
case "", LegacyValidationConfig:
case UTF8ValidationConfig:
if model.NameValidationScheme != model.UTF8Validation {
return fmt.Errorf("utf8 name validation requested but feature not enabled via --enable-feature=utf8-names")
}
default:
return fmt.Errorf("unknown name validation method specified, must be either 'legacy' or 'utf8', got %s", globalConfig.MetricNameValidationScheme)
}
c.MetricNameValidationScheme = globalConfig.MetricNameValidationScheme
return nil
}

View file

@ -56,7 +56,8 @@ The Prometheus monitoring server
| <code class="text-nowrap">--query.timeout</code> | Maximum time a query may take before being aborted. Use with server mode only. | `2m` |
| <code class="text-nowrap">--query.max-concurrency</code> | Maximum number of queries executed concurrently. Use with server mode only. | `20` |
| <code class="text-nowrap">--query.max-samples</code> | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` |
| <code class="text-nowrap">--enable-feature</code> | Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | |
| <code class="text-nowrap">--scrape.name-escaping-scheme</code> | Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots". | `values` |
| <code class="text-nowrap">--enable-feature</code> | Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | |
| <code class="text-nowrap">--log.level</code> | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` |
| <code class="text-nowrap">--log.format</code> | Output format of log messages. One of: [logfmt, json] | `logfmt` |

View file

@ -121,6 +121,11 @@ global:
# that will be kept in memory. 0 means no limit.
[ keep_dropped_targets: <int> | default = 0 ]
# Specifies the validation scheme for metric and label names. Either blank or
# "legacy" for letters, numbers, colons, and underscores; or "utf8" for full
# UTF-8 support.
[ metric_name_validation_scheme <string> | default "legacy" ]
runtime:
# Configure the Go garbage collector GOGC parameter
# See: https://tip.golang.org/doc/gc-guide#GOGC
@ -461,6 +466,11 @@ metric_relabel_configs:
# that will be kept in memory. 0 means no limit.
[ keep_dropped_targets: <int> | default = 0 ]
# Specifies the validation scheme for metric and label names. Either blank or
# "legacy" for letters, numbers, colons, and underscores; or "utf8" for full
# UTF-8 support.
[ metric_name_validation_scheme <string> | default "legacy" ]
# Limit on total number of positive and negative buckets allowed in a single
# native histogram. The resolution of a histogram with more buckets will be
# reduced until the number of buckets is within the limit. If the limit cannot

View file

@ -249,3 +249,11 @@ In the event of multiple consecutive Head compactions being possible, only the f
Note that during this delay, the Head continues its usual operations, which include serving and appending series.
Despite the delay in compaction, the blocks produced are time-aligned in the same manner as they would be if the delay was not in place.
## UTF-8 Name Support
`--enable-feature=utf8-names`
When enabled, changes the metric and label name validation scheme inside Prometheus to allow the full UTF-8 character set.
By itself, this flag does not enable the request of UTF-8 names via content negotiation.
Users will also have to set `metric_name_validation_scheme` in scrape configs to enable the feature either on the global config or on a per-scrape config basis.

View file

@ -93,6 +93,8 @@ type Options struct {
skipOffsetting bool
}
const DefaultNameEscapingScheme = model.ValueEncodingEscaping
// Manager maintains a set of scrape pools and manages start/stop cycles
// when receiving new target groups from the discovery manager.
type Manager struct {

View file

@ -303,6 +303,11 @@ func (sp *scrapePool) restartLoops(reuseCache bool) {
mrc = sp.config.MetricRelabelConfigs
)
validationScheme := model.LegacyValidation
if sp.config.MetricNameValidationScheme == config.UTF8ValidationConfig {
validationScheme = model.UTF8Validation
}
sp.targetMtx.Lock()
forcedErr := sp.refreshTargetLimitErr()
@ -323,7 +328,7 @@ func (sp *scrapePool) restartLoops(reuseCache bool) {
client: sp.client,
timeout: timeout,
bodySizeLimit: bodySizeLimit,
acceptHeader: acceptHeader(sp.config.ScrapeProtocols),
acceptHeader: acceptHeader(sp.config.ScrapeProtocols, validationScheme),
acceptEncodingHeader: acceptEncodingHeader(enableCompression),
}
newLoop = sp.newLoop(scrapeLoopOptions{
@ -452,6 +457,11 @@ func (sp *scrapePool) sync(targets []*Target) {
scrapeClassicHistograms = sp.config.ScrapeClassicHistograms
)
validationScheme := model.LegacyValidation
if sp.config.MetricNameValidationScheme == config.UTF8ValidationConfig {
validationScheme = model.UTF8Validation
}
sp.targetMtx.Lock()
for _, t := range targets {
hash := t.hash()
@ -467,7 +477,7 @@ func (sp *scrapePool) sync(targets []*Target) {
client: sp.client,
timeout: timeout,
bodySizeLimit: bodySizeLimit,
acceptHeader: acceptHeader(sp.config.ScrapeProtocols),
acceptHeader: acceptHeader(sp.config.ScrapeProtocols, validationScheme),
acceptEncodingHeader: acceptEncodingHeader(enableCompression),
metrics: sp.metrics,
}
@ -714,11 +724,16 @@ var errBodySizeLimit = errors.New("body size limit exceeded")
// acceptHeader transforms preference from the options into specific header values as
// https://www.rfc-editor.org/rfc/rfc9110.html#name-accept defines.
// No validation is here, we expect scrape protocols to be validated already.
func acceptHeader(sps []config.ScrapeProtocol) string {
func acceptHeader(sps []config.ScrapeProtocol, scheme model.ValidationScheme) string {
var vals []string
weight := len(config.ScrapeProtocolsHeaders) + 1
for _, sp := range sps {
vals = append(vals, fmt.Sprintf("%s;q=0.%d", config.ScrapeProtocolsHeaders[sp], weight))
val := config.ScrapeProtocolsHeaders[sp]
if scheme == model.UTF8Validation {
val += ";" + config.UTF8NamesHeader
}
val += fmt.Sprintf(";q=0.%d", weight)
vals = append(vals, val)
weight--
}
// Default match anything.

View file

@ -2339,11 +2339,15 @@ func TestTargetScraperScrapeOK(t *testing.T) {
)
var protobufParsing bool
var allowUTF8 bool
server := httptest.NewServer(
http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
accept := r.Header.Get("Accept")
if allowUTF8 {
require.Truef(t, strings.Contains(accept, "escaping=allow-utf-8"), "Expected Accept header to allow utf8, got %q", accept)
}
if protobufParsing {
accept := r.Header.Get("Accept")
require.True(t, strings.HasPrefix(accept, "application/vnd.google.protobuf;"),
"Expected Accept header to prefer application/vnd.google.protobuf.")
}
@ -2351,7 +2355,11 @@ func TestTargetScraperScrapeOK(t *testing.T) {
timeout := r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds")
require.Equal(t, expectedTimeout, timeout, "Expected scrape timeout header.")
w.Header().Set("Content-Type", `text/plain; version=0.0.4`)
if allowUTF8 {
w.Header().Set("Content-Type", `text/plain; version=1.0.0; escaping=allow-utf-8`)
} else {
w.Header().Set("Content-Type", `text/plain; version=0.0.4`)
}
w.Write([]byte("metric_a 1\nmetric_b 2\n"))
}),
)
@ -2380,13 +2388,22 @@ func TestTargetScraperScrapeOK(t *testing.T) {
require.NoError(t, err)
contentType, err := ts.readResponse(context.Background(), resp, &buf)
require.NoError(t, err)
require.Equal(t, "text/plain; version=0.0.4", contentType)
if allowUTF8 {
require.Equal(t, "text/plain; version=1.0.0; escaping=allow-utf-8", contentType)
} else {
require.Equal(t, "text/plain; version=0.0.4", contentType)
}
require.Equal(t, "metric_a 1\nmetric_b 2\n", buf.String())
}
runTest(acceptHeader(config.DefaultScrapeProtocols))
runTest(acceptHeader(config.DefaultScrapeProtocols, model.LegacyValidation))
protobufParsing = true
runTest(acceptHeader(config.DefaultProtoFirstScrapeProtocols))
runTest(acceptHeader(config.DefaultProtoFirstScrapeProtocols, model.LegacyValidation))
protobufParsing = false
allowUTF8 = true
runTest(acceptHeader(config.DefaultScrapeProtocols, model.UTF8Validation))
protobufParsing = true
runTest(acceptHeader(config.DefaultProtoFirstScrapeProtocols, model.UTF8Validation))
}
func TestTargetScrapeScrapeCancel(t *testing.T) {
@ -2412,7 +2429,7 @@ func TestTargetScrapeScrapeCancel(t *testing.T) {
),
},
client: http.DefaultClient,
acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols),
acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols, model.LegacyValidation),
}
ctx, cancel := context.WithCancel(context.Background())
@ -2467,7 +2484,7 @@ func TestTargetScrapeScrapeNotFound(t *testing.T) {
),
},
client: http.DefaultClient,
acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols),
acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols, model.LegacyValidation),
}
resp, err := ts.scrape(context.Background())
@ -2511,7 +2528,7 @@ func TestTargetScraperBodySizeLimit(t *testing.T) {
},
client: http.DefaultClient,
bodySizeLimit: bodySizeLimit,
acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols),
acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols, model.LegacyValidation),
metrics: newTestScrapeMetrics(t),
}
var buf bytes.Buffer