Add labels.MapLabels() for setting mapped labels

This makes building labels faster by having a fast lookup for string->index path via a map.
Since we now need to populate both the slice that maps index->string and a map that gives us string->index.
For that we add labels.MapLabels() function which handles updating the static mapping.

Signed-off-by: Lukasz Mierzwa <l.mierzwa@gmail.com>
This commit is contained in:
Lukasz Mierzwa 2025-02-21 10:22:26 +00:00
parent 24dd39bc57
commit cf9d3a0db5
2 changed files with 121 additions and 89 deletions

View file

@ -101,9 +101,8 @@ func selectBlockStringsToMap(block *tsdb.Block) ([]string, error) {
}) })
mappedLabels := make([]string, 0, 256) mappedLabels := make([]string, 0, 256)
mappedLabels = append(mappedLabels, "") // We must always store empty string.
for i, c := range costs { for i, c := range costs {
if i > 254 { if i >= 256 {
break break
} }
mappedLabels = append(mappedLabels, c.name) mappedLabels = append(mappedLabels, c.name)
@ -132,6 +131,6 @@ func mapCommonLabelSymbols(db *tsdb.DB, logger *slog.Logger) error {
return err return err
} }
logger.Info("Mapped common label strings", slog.Int("count", len(mappedLabels))) logger.Info("Mapped common label strings", slog.Int("count", len(mappedLabels)))
labels.MappedLabels = mappedLabels labels.MapLabels(mappedLabels)
return nil return nil
} }

View file

@ -23,88 +23,122 @@ import (
"github.com/cespare/xxhash/v2" "github.com/cespare/xxhash/v2"
) )
// List of labels that should be mapped to a single byte value. var (
// Obviously can't have more than 256 here. // List of labels that should be mapped to a single byte value.
var MappedLabels = []string{ // Obviously can't have more than 256 here.
// Empty string, this must be present here. mappedLabels = []string{}
"", mappedLabelIndex = map[string]byte{}
// These label names are always present on every time series. )
MetricName,
InstanceName, // MapLabels takes a list of strings that shuld use a single byte storage
"job", // inside labels, making them use as little memory as possible.
// Common label names. // Since we use a single byte mapping we can only have 256 such strings.
BucketLabel, //
"code", // We MUST store empty string ("") as one of the values here and if you
"handler", // don't pass it into MapLabels() then it will be injected.
"quantile", //
// Meta metric names injected by Prometheus itself. // If you pass more strings than 256 then extra strings will be ignored.
"scrape_body_size_bytes", func MapLabels(names []string) {
"scrape_duration_seconds", // We must always store empty string. Push it to the front of the slice if not present.
"scrape_sample_limit", if !slices.Contains(names, "") {
"scrape_samples_post_metric_relabeling", names = append([]string{""}, names...)
"scrape_samples_scraped", }
"scrape_series_added",
"scrape_timeout_seconds", mappedLabels = make([]string, 0, 256)
// Common metric names from client libraries. mappedLabelIndex = make(map[string]byte, 256)
"process_cpu_seconds_total",
"process_max_fds", for i, name := range names {
"process_network_receive_bytes_total", if i >= 256 {
"process_network_transmit_bytes_total", break
"process_open_fds", }
"process_resident_memory_bytes", mappedLabels = append(mappedLabels, name)
"process_start_time_seconds ", mappedLabelIndex[name] = byte(i)
"process_virtual_memory_bytes", }
"process_virtual_memory_max_bytes", }
// client_go specific metrics
"go_gc_heap_frees_by_size_bytes_bucket", func init() {
"go_gc_heap_allocs_by_size_bytes_bucket", names := []string{
"net_conntrack_dialer_conn_failed_total", // Empty string, this must be present here.
"go_sched_pauses_total_other_seconds_bucket", "",
"go_sched_pauses_total_gc_seconds_bucket", // These label names are always present on every time series.
"go_sched_pauses_stopping_other_seconds_bucket", MetricName,
"go_sched_pauses_stopping_gc_seconds_bucket", InstanceName,
"go_sched_latencies_seconds_bucket", "job",
"go_gc_pauses_seconds_bucket", // Common label names.
"go_gc_duration_seconds", BucketLabel,
// node_exporter metrics "code",
"node_cpu_seconds_total", "handler",
"node_scrape_collector_success", "quantile",
"node_scrape_collector_duration_seconds", // Meta metric names injected by Prometheus itself.
"node_cpu_scaling_governor", "scrape_body_size_bytes",
"node_cpu_guest_seconds_total", "scrape_duration_seconds",
"node_hwmon_temp_celsius", "scrape_sample_limit",
"node_hwmon_sensor_label", "scrape_samples_post_metric_relabeling",
"node_hwmon_temp_max_celsius", "scrape_samples_scraped",
"node_cooling_device_max_state", "scrape_series_added",
"node_cooling_device_cur_state", "scrape_timeout_seconds",
"node_softnet_times_squeezed_total", // Common metric names from client libraries.
"node_softnet_received_rps_total", "process_cpu_seconds_total",
"node_softnet_processed_total", "process_max_fds",
"node_softnet_flow_limit_count_total", "process_network_receive_bytes_total",
"node_softnet_dropped_total", "process_network_transmit_bytes_total",
"node_softnet_cpu_collision_total", "process_open_fds",
"node_softnet_backlog_len", "process_resident_memory_bytes",
"node_schedstat_waiting_seconds_total", "process_start_time_seconds ",
"node_schedstat_timeslices_total", "process_virtual_memory_bytes",
"node_schedstat_running_seconds_total", "process_virtual_memory_max_bytes",
"node_cpu_scaling_frequency_min_hertz", // client_go specific metrics
"node_cpu_scaling_frequency_max_hertz", "go_gc_heap_frees_by_size_bytes_bucket",
"node_cpu_scaling_frequency_hertz", "go_gc_heap_allocs_by_size_bytes_bucket",
"node_cpu_frequency_min_hertz", "net_conntrack_dialer_conn_failed_total",
"node_cpu_frequency_max_hertz", "go_sched_pauses_total_other_seconds_bucket",
"node_hwmon_temp_crit_celsius", "go_sched_pauses_total_gc_seconds_bucket",
"node_hwmon_temp_crit_alarm_celsius", "go_sched_pauses_stopping_other_seconds_bucket",
"node_cpu_core_throttles_total", "go_sched_pauses_stopping_gc_seconds_bucket",
"node_thermal_zone_temp", "go_sched_latencies_seconds_bucket",
"node_hwmon_temp_min_celsius", "go_gc_pauses_seconds_bucket",
"node_hwmon_chip_names", "go_gc_duration_seconds",
"node_filesystem_readonly", // node_exporter metrics
"node_filesystem_device_error", "node_cpu_seconds_total",
"node_filesystem_size_bytes", "node_scrape_collector_success",
"node_filesystem_free_bytes", "node_scrape_collector_duration_seconds",
"node_filesystem_files_free", "node_cpu_scaling_governor",
"node_filesystem_files", "node_cpu_guest_seconds_total",
"node_filesystem_avail_bytes", "node_hwmon_temp_celsius",
"node_hwmon_sensor_label",
"node_hwmon_temp_max_celsius",
"node_cooling_device_max_state",
"node_cooling_device_cur_state",
"node_softnet_times_squeezed_total",
"node_softnet_received_rps_total",
"node_softnet_processed_total",
"node_softnet_flow_limit_count_total",
"node_softnet_dropped_total",
"node_softnet_cpu_collision_total",
"node_softnet_backlog_len",
"node_schedstat_waiting_seconds_total",
"node_schedstat_timeslices_total",
"node_schedstat_running_seconds_total",
"node_cpu_scaling_frequency_min_hertz",
"node_cpu_scaling_frequency_max_hertz",
"node_cpu_scaling_frequency_hertz",
"node_cpu_frequency_min_hertz",
"node_cpu_frequency_max_hertz",
"node_hwmon_temp_crit_celsius",
"node_hwmon_temp_crit_alarm_celsius",
"node_cpu_core_throttles_total",
"node_thermal_zone_temp",
"node_hwmon_temp_min_celsius",
"node_hwmon_chip_names",
"node_filesystem_readonly",
"node_filesystem_device_error",
"node_filesystem_size_bytes",
"node_filesystem_free_bytes",
"node_filesystem_files_free",
"node_filesystem_files",
"node_filesystem_avail_bytes",
}
MapLabels(names)
} }
// Labels is implemented by a single flat string holding name/value pairs. // Labels is implemented by a single flat string holding name/value pairs.
@ -144,15 +178,14 @@ func decodeString(data string, index int) (string, int) {
size, index, mapped = decodeSize(data, index) size, index, mapped = decodeSize(data, index)
if mapped { if mapped {
b := data[index] b := data[index]
return MappedLabels[int(b)], index + size return mappedLabels[int(b)], index + size
} }
return data[index : index+size], index + size return data[index : index+size], index + size
} }
func encodeShortString(s string) (int, byte) { func encodeShortString(s string) (int, byte) {
i := slices.Index(MappedLabels, s) if i, ok := mappedLabelIndex[s]; ok {
if i >= 0 { return 0, i
return 0, byte(i)
} }
return len(s), 0 return len(s), 0
} }