From 565c6fa7049bf59f9dbc06b69a58a8286f98148a Mon Sep 17 00:00:00 2001 From: Lukasz Mierzwa Date: Thu, 6 Feb 2025 15:37:32 +0000 Subject: [PATCH 1/7] Reduce stringlabels memory usage for common labels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stringlabels stores all time series labels as a single string using this format: [ ...] So a label set for my_metric{job=foo, instance="bar", env="prod", blank=""} would be encoded as: [8]__name__[9]my_metric[3]job[3]foo[8]instance[3]bar[3]env[4]prod[5]blank[0] This is a huge improvement over 'classic' labels implementation that stores all label names & values as seperate strings. There is some room for improvement though since some string are present more often than others. For example __name__ will be present for all label sets of every time series we store in HEAD, eating 1+8=9 bytes. Since __name__ is well known string we can try to use a single byte to store it in our encoded string, rather than repeat it in full each time. To be able to store strings that are short cut into a single byte we need to somehow signal that to the reader of the encoded string, for that we use the fact that zero length strings are rare and generaly not stored on time series. If we have an encoded string with zero length then this will now signal that it represents a mapped value - to learn the true value of this string we need to read the next byte which gives us index in a static mapping. That mapping must include empty string, so that we can still encode empty strings using this scheme. Example of our mapping (minimal version): 0: "" 1: "__name__" 2: "instance" 3: "job" With that mapping our example label set would be encoded as: [0]1[9]mymetric[0]3[3]foo[0]2[3]bar[3]env[4]prod[5]blank[0]0 The tricky bit is how to populate this mapping with useful strings that will result in measurable memory savings. This is further complicated by the fact that the mapping must remain static and cannot be modified during Prometheus lifetime. We can use all the 255 slots we have inside our mapping byte with well known generic strings and that will provide some measurable savings for all Prometheus users, and is essentially a slightly more compact stringlabels variant. We could also allow users to pass in a list of well know strings via flags, which will allow Prometheus operators to reduce memory usage for any labels if they know those are popular. Third option is to discover most popular strings from TSDB or WAL on startup, but that's more complicated and we might pick a list that would be the best set of mapped strings on startup, but after some time is no longer the best set. Benchmark results: goos: linux goarch: amd64 pkg: github.com/prometheus/prometheus/model/labels cpu: 13th Gen Intel(R) Core(TM) i7-13800H │ main.txt │ new1.txt │ │ sec/op │ sec/op vs base │ String-20 863.8n ± 4% 873.0n ± 4% ~ (p=0.353 n=10) Labels_Get/with_5_labels/first_label/get-20 4.763n ± 1% 5.035n ± 0% +5.72% (p=0.000 n=10) Labels_Get/with_5_labels/first_label/has-20 3.439n ± 0% 3.967n ± 0% +15.37% (p=0.000 n=10) Labels_Get/with_5_labels/middle_label/get-20 7.077n ± 1% 9.588n ± 1% +35.47% (p=0.000 n=10) Labels_Get/with_5_labels/middle_label/has-20 5.166n ± 0% 6.990n ± 1% +35.30% (p=0.000 n=10) Labels_Get/with_5_labels/last_label/get-20 9.181n ± 1% 12.970n ± 1% +41.26% (p=0.000 n=10) Labels_Get/with_5_labels/last_label/has-20 8.101n ± 1% 11.640n ± 1% +43.69% (p=0.000 n=10) Labels_Get/with_5_labels/not-found_label/get-20 3.974n ± 0% 4.768n ± 0% +19.98% (p=0.000 n=10) Labels_Get/with_5_labels/not-found_label/has-20 3.974n ± 0% 5.033n ± 0% +26.65% (p=0.000 n=10) Labels_Get/with_10_labels/first_label/get-20 4.761n ± 0% 5.042n ± 0% +5.90% (p=0.000 n=10) Labels_Get/with_10_labels/first_label/has-20 3.442n ± 0% 3.972n ± 0% +15.40% (p=0.000 n=10) Labels_Get/with_10_labels/middle_label/get-20 10.62n ± 1% 14.85n ± 1% +39.83% (p=0.000 n=10) Labels_Get/with_10_labels/middle_label/has-20 9.360n ± 1% 13.375n ± 0% +42.90% (p=0.000 n=10) Labels_Get/with_10_labels/last_label/get-20 18.19n ± 1% 22.00n ± 0% +20.97% (p=0.000 n=10) Labels_Get/with_10_labels/last_label/has-20 16.51n ± 0% 20.50n ± 1% +24.14% (p=0.000 n=10) Labels_Get/with_10_labels/not-found_label/get-20 3.985n ± 0% 4.768n ± 0% +19.62% (p=0.000 n=10) Labels_Get/with_10_labels/not-found_label/has-20 3.973n ± 0% 5.045n ± 0% +26.97% (p=0.000 n=10) Labels_Get/with_30_labels/first_label/get-20 4.773n ± 0% 5.050n ± 1% +5.80% (p=0.000 n=10) Labels_Get/with_30_labels/first_label/has-20 3.443n ± 1% 3.976n ± 2% +15.50% (p=0.000 n=10) Labels_Get/with_30_labels/middle_label/get-20 31.93n ± 0% 43.50n ± 1% +36.21% (p=0.000 n=10) Labels_Get/with_30_labels/middle_label/has-20 30.53n ± 0% 41.75n ± 1% +36.75% (p=0.000 n=10) Labels_Get/with_30_labels/last_label/get-20 106.55n ± 0% 71.17n ± 0% -33.21% (p=0.000 n=10) Labels_Get/with_30_labels/last_label/has-20 104.70n ± 0% 69.21n ± 1% -33.90% (p=0.000 n=10) Labels_Get/with_30_labels/not-found_label/get-20 3.976n ± 1% 4.772n ± 0% +20.03% (p=0.000 n=10) Labels_Get/with_30_labels/not-found_label/has-20 3.974n ± 0% 5.032n ± 0% +26.64% (p=0.000 n=10) Labels_Equals/equal-20 2.382n ± 0% 2.446n ± 0% +2.67% (p=0.000 n=10) Labels_Equals/not_equal-20 0.2741n ± 2% 0.2662n ± 2% -2.88% (p=0.001 n=10) Labels_Equals/different_sizes-20 0.2762n ± 3% 0.2652n ± 0% -3.95% (p=0.000 n=10) Labels_Equals/lots-20 2.381n ± 0% 2.386n ± 1% +0.23% (p=0.011 n=10) Labels_Equals/real_long_equal-20 6.087n ± 1% 5.558n ± 1% -8.70% (p=0.000 n=10) Labels_Equals/real_long_different_end-20 5.030n ± 0% 4.699n ± 0% -6.57% (p=0.000 n=10) Labels_Compare/equal-20 4.814n ± 1% 4.777n ± 0% -0.77% (p=0.000 n=10) Labels_Compare/not_equal-20 17.55n ± 8% 20.92n ± 1% +19.24% (p=0.000 n=10) Labels_Compare/different_sizes-20 3.711n ± 1% 3.707n ± 0% ~ (p=0.224 n=10) Labels_Compare/lots-20 27.09n ± 3% 28.73n ± 2% +6.05% (p=0.000 n=10) Labels_Compare/real_long_equal-20 27.91n ± 3% 15.67n ± 1% -43.86% (p=0.000 n=10) Labels_Compare/real_long_different_end-20 33.92n ± 1% 35.35n ± 1% +4.22% (p=0.000 n=10) Labels_Hash/typical_labels_under_1KB-20 59.63n ± 0% 59.67n ± 0% ~ (p=0.897 n=10) Labels_Hash/bigger_labels_over_1KB-20 73.42n ± 1% 73.81n ± 1% ~ (p=0.342 n=10) Labels_Hash/extremely_large_label_value_10MB-20 720.3µ ± 2% 715.2µ ± 3% ~ (p=0.971 n=10) Builder-20 371.6n ± 4% 1191.0n ± 3% +220.46% (p=0.000 n=10) Labels_Copy-20 85.52n ± 4% 53.90n ± 48% -36.97% (p=0.000 n=10) geomean 13.26n 14.68n +10.71% │ main.txt │ new1.txt │ │ B/op │ B/op vs base │ String-20 240.0 ± 0% 240.0 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_5_labels/first_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_5_labels/first_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_5_labels/middle_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_5_labels/middle_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_5_labels/last_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_5_labels/last_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_5_labels/not-found_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_5_labels/not-found_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_10_labels/first_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_10_labels/first_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_10_labels/middle_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_10_labels/middle_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_10_labels/last_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_10_labels/last_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_10_labels/not-found_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_10_labels/not-found_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_30_labels/first_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_30_labels/first_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_30_labels/middle_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_30_labels/middle_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_30_labels/last_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_30_labels/last_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_30_labels/not-found_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_30_labels/not-found_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Equals/equal-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Equals/not_equal-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Equals/different_sizes-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Equals/lots-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Equals/real_long_equal-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Equals/real_long_different_end-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Compare/equal-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Compare/not_equal-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Compare/different_sizes-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Compare/lots-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Compare/real_long_equal-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Compare/real_long_different_end-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Hash/typical_labels_under_1KB-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Hash/bigger_labels_over_1KB-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Hash/extremely_large_label_value_10MB-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Builder-20 224.0 ± 0% 192.0 ± 0% -14.29% (p=0.000 n=10) Labels_Copy-20 224.0 ± 0% 192.0 ± 0% -14.29% (p=0.000 n=10) geomean ² -0.73% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ main.txt │ new1.txt │ │ allocs/op │ allocs/op vs base │ String-20 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_5_labels/first_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_5_labels/first_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_5_labels/middle_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_5_labels/middle_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_5_labels/last_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_5_labels/last_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_5_labels/not-found_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_5_labels/not-found_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_10_labels/first_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_10_labels/first_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_10_labels/middle_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_10_labels/middle_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_10_labels/last_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_10_labels/last_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_10_labels/not-found_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_10_labels/not-found_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_30_labels/first_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_30_labels/first_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_30_labels/middle_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_30_labels/middle_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_30_labels/last_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_30_labels/last_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_30_labels/not-found_label/get-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Get/with_30_labels/not-found_label/has-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Equals/equal-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Equals/not_equal-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Equals/different_sizes-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Equals/lots-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Equals/real_long_equal-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Equals/real_long_different_end-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Compare/equal-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Compare/not_equal-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Compare/different_sizes-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Compare/lots-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Compare/real_long_equal-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Compare/real_long_different_end-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Hash/typical_labels_under_1KB-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Hash/bigger_labels_over_1KB-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Hash/extremely_large_label_value_10MB-20 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Builder-20 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ Labels_Copy-20 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean Signed-off-by: Lukasz Mierzwa --- model/labels/labels_stringlabels.go | 169 ++++++++++++++++++++++------ 1 file changed, 133 insertions(+), 36 deletions(-) diff --git a/model/labels/labels_stringlabels.go b/model/labels/labels_stringlabels.go index f49ed96f65..ed4868469f 100644 --- a/model/labels/labels_stringlabels.go +++ b/model/labels/labels_stringlabels.go @@ -23,6 +23,39 @@ import ( "github.com/cespare/xxhash/v2" ) +// List of labels that should be mapped to a single byte value. +// Obviously can't have more than 256 here. +var mappedLabels = []string{ + // Empty string, this must be present here. + "", + // These label names are always present on every time series. + "__name__", + "instance", + "job", + // Common label names. + "code", + "handler", + "quantile", + // Meta metric names injected by Prometheus itself. + "scrape_body_size_bytes", + "scrape_duration_seconds", + "scrape_sample_limit", + "scrape_samples_post_metric_relabeling", + "scrape_samples_scraped", + "scrape_series_added", + "scrape_timeout_seconds", + // Common metric names from client libraries. + "process_cpu_seconds_total", + "process_max_fds", + "process_network_receive_bytes_total", + "process_network_transmit_bytes_total", + "process_open_fds", + "process_resident_memory_bytes", + "process_start_time_seconds ", + "process_virtual_memory_bytes", + "process_virtual_memory_max_bytes", +} + // Labels is implemented by a single flat string holding name/value pairs. // Each name and value is preceded by its length in varint encoding. // Names are in order. @@ -30,12 +63,15 @@ type Labels struct { data string } -func decodeSize(data string, index int) (int, int) { +func decodeSize(data string, index int) (int, int, bool) { // Fast-path for common case of a single byte, value 0..127. b := data[index] index++ + if b == 0 { + return 1, index, true + } if b < 0x80 { - return int(b), index + return int(b), index, false } size := int(b & 0x7F) for shift := uint(7); ; shift += 7 { @@ -48,15 +84,28 @@ func decodeSize(data string, index int) (int, int) { break } } - return size, index + return size, index, false } func decodeString(data string, index int) (string, int) { var size int - size, index = decodeSize(data, index) + var mapped bool + size, index, mapped = decodeSize(data, index) + if mapped { + b := data[index] + return mappedLabels[int(b)], index + size + } return data[index : index+size], index + size } +func encodeShortString(s string) (int, byte) { + i := slices.Index(mappedLabels, s) + if i >= 0 { + return 0, byte(i) + } + return len(s), 0 +} + // Bytes returns ls as a byte slice. // It uses non-printing characters and so should not be used for printing. func (ls Labels) Bytes(buf []byte) []byte { @@ -197,23 +246,37 @@ func (ls Labels) Get(name string) string { return "" // Prometheus does not store blank label names. } for i := 0; i < len(ls.data); { - var size int - size, i = decodeSize(ls.data, i) - if ls.data[i] == name[0] { - lName := ls.data[i : i+size] - i += size + var size, next int + var mapped bool + var lName, lValue string + size, next, mapped = decodeSize(ls.data, i) // Read the key index and size. + if mapped { // Key is a mapped string, so decode it fully and move i to the value index. + lName, i = decodeString(ls.data, i) if lName == name { - lValue, _ := decodeString(ls.data, i) + lValue, _ = decodeString(ls.data, i) return lValue } - } else { - if ls.data[i] > name[0] { // Stop looking if we've gone past. + if lName[0] > name[0] { // Stop looking if we've gone past. break } - i += size + } else { // Value is stored raw in the data string. + i = next // Move index to the start of the key string. + if ls.data[i] == name[0] { + lName = ls.data[i : i+size] + i += size // We got the key string, move the index to the start of the value. + if lName == name { + lValue, _ := decodeString(ls.data, i) + return lValue + } + } else { + if ls.data[i] > name[0] { // Stop looking if we've gone past. + break + } + i += size + } } - size, i = decodeSize(ls.data, i) - i += size + size, i, _ = decodeSize(ls.data, i) // Read the value index and size. + i += size // move the index past the value so we can read the next key. } return "" } @@ -224,21 +287,33 @@ func (ls Labels) Has(name string) bool { return false // Prometheus does not store blank label names. } for i := 0; i < len(ls.data); { - var size int - size, i = decodeSize(ls.data, i) - if ls.data[i] == name[0] { - lName := ls.data[i : i+size] - i += size + var size, next int + var mapped bool + var lName string + size, next, mapped = decodeSize(ls.data, i) + if mapped { + lName, i = decodeString(ls.data, i) if lName == name { return true } - } else { - if ls.data[i] > name[0] { // Stop looking if we've gone past. + if lName[0] > name[0] { // Stop looking if we've gone past. break } + } else { + i = next + if ls.data[i] == name[0] { + lName = ls.data[i : i+size] + if lName == name { + return true + } + } else { + if ls.data[i] > name[0] { // Stop looking if we've gone past. + break + } + } i += size } - size, i = decodeSize(ls.data, i) + size, i, _ = decodeSize(ls.data, i) i += size } return false @@ -356,10 +431,10 @@ func Compare(a, b Labels) int { // Now we know that there is some difference before the end of a and b. // Go back through the fields and find which field that difference is in. firstCharDifferent, i := i, 0 - size, nextI := decodeSize(a.data, i) + size, nextI, _ := decodeSize(a.data, i) for nextI+size <= firstCharDifferent { i = nextI + size - size, nextI = decodeSize(a.data, i) + size, nextI, _ = decodeSize(a.data, i) } // Difference is inside this entry. aStr, _ := decodeString(a.data, i) @@ -385,9 +460,9 @@ func (ls Labels) Len() int { count := 0 for i := 0; i < len(ls.data); { var size int - size, i = decodeSize(ls.data, i) + size, i, _ = decodeSize(ls.data, i) i += size - size, i = decodeSize(ls.data, i) + size, i, _ = decodeSize(ls.data, i) i += size count++ } @@ -422,7 +497,7 @@ func (ls Labels) Validate(f func(l Label) error) error { func (ls Labels) DropMetricName() Labels { for i := 0; i < len(ls.data); { lName, i2 := decodeString(ls.data, i) - size, i2 := decodeSize(ls.data, i2) + size, i2, _ := decodeSize(ls.data, i2) i2 += size if lName == MetricName { if i == 0 { // Make common case fast with no allocations. @@ -518,12 +593,27 @@ func marshalLabelsToSizedBuffer(lbls []Label, data []byte) int { func marshalLabelToSizedBuffer(m *Label, data []byte) int { i := len(data) - i -= len(m.Value) - copy(data[i:], m.Value) - i = encodeSize(data, i, len(m.Value)) - i -= len(m.Name) - copy(data[i:], m.Name) - i = encodeSize(data, i, len(m.Name)) + + size, b := encodeShortString(m.Value) + if size == 0 { + i-- + data[i] = b + } else { + i -= size + copy(data[i:], m.Value) + } + i = encodeSize(data, i, size) + + size, b = encodeShortString(m.Name) + if size == 0 { + i-- + data[i] = b + } else { + i -= size + copy(data[i:], m.Name) + } + i = encodeSize(data, i, size) + return len(data) - i } @@ -581,9 +671,16 @@ func labelsSize(lbls []Label) (n int) { func labelSize(m *Label) (n int) { // strings are encoded as length followed by contents. - l := len(m.Name) + l, _ := encodeShortString(m.Name) + if l == 0 { + l++ + } n += l + sizeVarint(uint64(l)) - l = len(m.Value) + + l, _ = encodeShortString(m.Value) + if l == 0 { + l++ + } n += l + sizeVarint(uint64(l)) return n } From fb32c770c738442b2b19975123131ec7f8ca5482 Mon Sep 17 00:00:00 2001 From: Lukasz Mierzwa Date: Fri, 7 Feb 2025 15:46:49 +0000 Subject: [PATCH 2/7] Update collision hashes for stringlabels Signed-off-by: Lukasz Mierzwa --- tsdb/agent/series_test.go | 4 ++-- tsdb/head_test.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tsdb/agent/series_test.go b/tsdb/agent/series_test.go index bc5a4af5d3..85387d03a9 100644 --- a/tsdb/agent/series_test.go +++ b/tsdb/agent/series_test.go @@ -82,8 +82,8 @@ func labelsWithHashCollision() (labels.Labels, labels.Labels) { if ls1.Hash() != ls2.Hash() { // These ones are the same when using -tags stringlabels - ls1 = labels.FromStrings("__name__", "metric", "lbl", "HFnEaGl") - ls2 = labels.FromStrings("__name__", "metric", "lbl", "RqcXatm") + ls1 = labels.FromStrings("__name__", "metric", "lbl", "D3opXYk") + ls2 = labels.FromStrings("__name__", "metric", "lbl", "G1__3.m") } if ls1.Hash() != ls2.Hash() { diff --git a/tsdb/head_test.go b/tsdb/head_test.go index e498578c10..66ec1962b1 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -6176,8 +6176,8 @@ func labelsWithHashCollision() (labels.Labels, labels.Labels) { if ls1.Hash() != ls2.Hash() { // These ones are the same when using -tags stringlabels - ls1 = labels.FromStrings("__name__", "metric", "lbl", "HFnEaGl") - ls2 = labels.FromStrings("__name__", "metric", "lbl", "RqcXatm") + ls1 = labels.FromStrings("__name__", "metric", "lbl", "D3opXYk") + ls2 = labels.FromStrings("__name__", "metric", "lbl", "G1__3.m") } if ls1.Hash() != ls2.Hash() { From d80fe563719a26ca264b034a470bbac5d47eeadb Mon Sep 17 00:00:00 2001 From: Lukasz Mierzwa Date: Mon, 10 Feb 2025 10:19:26 +0000 Subject: [PATCH 3/7] Update list of hardcoded labels Signed-off-by: Lukasz Mierzwa --- model/labels/labels_stringlabels.go | 59 +++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/model/labels/labels_stringlabels.go b/model/labels/labels_stringlabels.go index ed4868469f..bc59a4196f 100644 --- a/model/labels/labels_stringlabels.go +++ b/model/labels/labels_stringlabels.go @@ -25,14 +25,15 @@ import ( // List of labels that should be mapped to a single byte value. // Obviously can't have more than 256 here. -var mappedLabels = []string{ +var mappedLabels = [256]string{ // Empty string, this must be present here. "", // These label names are always present on every time series. - "__name__", - "instance", + MetricName, + InstanceName, "job", // Common label names. + BucketLabel, "code", "handler", "quantile", @@ -54,6 +55,56 @@ var mappedLabels = []string{ "process_start_time_seconds ", "process_virtual_memory_bytes", "process_virtual_memory_max_bytes", + // client_go specific metrics + "go_gc_heap_frees_by_size_bytes_bucket", + "go_gc_heap_allocs_by_size_bytes_bucket", + "net_conntrack_dialer_conn_failed_total", + "go_sched_pauses_total_other_seconds_bucket", + "go_sched_pauses_total_gc_seconds_bucket", + "go_sched_pauses_stopping_other_seconds_bucket", + "go_sched_pauses_stopping_gc_seconds_bucket", + "go_sched_latencies_seconds_bucket", + "go_gc_pauses_seconds_bucket", + "go_gc_duration_seconds", + // node_exporter metrics + "node_cpu_seconds_total", + "node_scrape_collector_success", + "node_scrape_collector_duration_seconds", + "node_cpu_scaling_governor", + "node_cpu_guest_seconds_total", + "node_hwmon_temp_celsius", + "node_hwmon_sensor_label", + "node_hwmon_temp_max_celsius", + "node_cooling_device_max_state", + "node_cooling_device_cur_state", + "node_softnet_times_squeezed_total", + "node_softnet_received_rps_total", + "node_softnet_processed_total", + "node_softnet_flow_limit_count_total", + "node_softnet_dropped_total", + "node_softnet_cpu_collision_total", + "node_softnet_backlog_len", + "node_schedstat_waiting_seconds_total", + "node_schedstat_timeslices_total", + "node_schedstat_running_seconds_total", + "node_cpu_scaling_frequency_min_hertz", + "node_cpu_scaling_frequency_max_hertz", + "node_cpu_scaling_frequency_hertz", + "node_cpu_frequency_min_hertz", + "node_cpu_frequency_max_hertz", + "node_hwmon_temp_crit_celsius", + "node_hwmon_temp_crit_alarm_celsius", + "node_cpu_core_throttles_total", + "node_thermal_zone_temp", + "node_hwmon_temp_min_celsius", + "node_hwmon_chip_names", + "node_filesystem_readonly", + "node_filesystem_device_error", + "node_filesystem_size_bytes", + "node_filesystem_free_bytes", + "node_filesystem_files_free", + "node_filesystem_files", + "node_filesystem_avail_bytes", } // Labels is implemented by a single flat string holding name/value pairs. @@ -99,7 +150,7 @@ func decodeString(data string, index int) (string, int) { } func encodeShortString(s string) (int, byte) { - i := slices.Index(mappedLabels, s) + i := slices.Index(mappedLabels[:], s) if i >= 0 { return 0, byte(i) } From b07a1318296140b9fb9f7a7fdc2469d4a15f163b Mon Sep 17 00:00:00 2001 From: Lukasz Mierzwa Date: Fri, 14 Feb 2025 11:11:18 +0000 Subject: [PATCH 4/7] Use a slice instead of an array Signed-off-by: Lukasz Mierzwa --- model/labels/labels_stringlabels.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/model/labels/labels_stringlabels.go b/model/labels/labels_stringlabels.go index bc59a4196f..7106aebc55 100644 --- a/model/labels/labels_stringlabels.go +++ b/model/labels/labels_stringlabels.go @@ -25,7 +25,7 @@ import ( // List of labels that should be mapped to a single byte value. // Obviously can't have more than 256 here. -var mappedLabels = [256]string{ +var mappedLabels = []string{ // Empty string, this must be present here. "", // These label names are always present on every time series. @@ -118,7 +118,7 @@ func decodeSize(data string, index int) (int, int, bool) { // Fast-path for common case of a single byte, value 0..127. b := data[index] index++ - if b == 0 { + if b == 0x0 { return 1, index, true } if b < 0x80 { @@ -150,7 +150,7 @@ func decodeString(data string, index int) (string, int) { } func encodeShortString(s string) (int, byte) { - i := slices.Index(mappedLabels[:], s) + i := slices.Index(mappedLabels, s) if i >= 0 { return 0, byte(i) } From 24dd39bc578ddc81fcc3214cb747b5edc8111e3e Mon Sep 17 00:00:00 2001 From: Lukasz Mierzwa Date: Tue, 18 Feb 2025 12:06:22 +0000 Subject: [PATCH 5/7] Automatically select common strings to map from last tsdb block This will populate the static mapping of strings to store as a single byte on startup. We use the last TSDB block as the source of data, iterate the index for each label and count how many time series given label pair is referencing. Signed-off-by: Lukasz Mierzwa --- cmd/prometheus/labels.go | 26 +++++ cmd/prometheus/labels_stringlabels.go | 137 ++++++++++++++++++++++++++ cmd/prometheus/main.go | 4 + model/labels/labels_stringlabels.go | 6 +- 4 files changed, 170 insertions(+), 3 deletions(-) create mode 100644 cmd/prometheus/labels.go create mode 100644 cmd/prometheus/labels_stringlabels.go diff --git a/cmd/prometheus/labels.go b/cmd/prometheus/labels.go new file mode 100644 index 0000000000..4f6d155295 --- /dev/null +++ b/cmd/prometheus/labels.go @@ -0,0 +1,26 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !stringlabels + +package main + +import ( + "log/slog" + + "github.com/prometheus/prometheus/tsdb" +) + +func mapCommonLabelSymbols(_ *tsdb.DB, _ *slog.Logger) error { + return nil +} diff --git a/cmd/prometheus/labels_stringlabels.go b/cmd/prometheus/labels_stringlabels.go new file mode 100644 index 0000000000..f63e0b896c --- /dev/null +++ b/cmd/prometheus/labels_stringlabels.go @@ -0,0 +1,137 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build stringlabels + +package main + +import ( + "cmp" + "context" + "fmt" + "log/slog" + "slices" + "strings" + + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/tsdb" + "github.com/prometheus/prometheus/tsdb/index" +) + +// countBlockSymbols reads given block index and counts how many time each string +// occurs on time series labels. +func countBlockSymbols(ctx context.Context, block *tsdb.Block) (map[string]int, error) { + names := map[string]int{} + + ir, err := block.Index() + if err != nil { + return names, err + } + + labelNames, err := ir.LabelNames(ctx) + if err != nil { + return names, err + } + + for _, name := range labelNames { + name = strings.Clone(name) + + if _, ok := names[name]; !ok { + names[name] = 0 + } + + values, err := ir.LabelValues(ctx, name) + if err != nil { + return names, err + } + for _, value := range values { + value = strings.Clone(value) + + if _, ok := names[value]; !ok { + names[value] = 0 + } + + p, err := ir.Postings(ctx, name, value) + if err != nil { + return names, err + } + + refs, err := index.ExpandPostings(p) + if err != nil { + return names, err + } + + names[name] += len(refs) + names[value] += len(refs) + } + } + return names, ir.Close() +} + +type labelCost struct { + name string + cost int +} + +// selectBlockStringsToMap takes a block and returns a list of strings that are most commonly +// present on all time series. +// List is sorted starting with the most frequent strings. +func selectBlockStringsToMap(block *tsdb.Block) ([]string, error) { + names, err := countBlockSymbols(context.Background(), block) + if err != nil { + return nil, fmt.Errorf("failed to build list of common strings in block %s: %w", block.Meta().ULID, err) + } + + costs := make([]labelCost, 0, len(names)) + for name, count := range names { + costs = append(costs, labelCost{name: name, cost: (len(name) - 1) * count}) + } + slices.SortFunc(costs, func(a, b labelCost) int { + return cmp.Compare(b.cost, a.cost) + }) + + mappedLabels := make([]string, 0, 256) + mappedLabels = append(mappedLabels, "") // We must always store empty string. + for i, c := range costs { + if i > 254 { + break + } + mappedLabels = append(mappedLabels, c.name) + } + return mappedLabels, nil +} + +func mapCommonLabelSymbols(db *tsdb.DB, logger *slog.Logger) error { + var block *tsdb.Block + for _, b := range db.Blocks() { + if block == nil || b.MaxTime() > block.MaxTime() { + block = b + } + } + if block == nil { + logger.Info("No tsdb blocks found, can't map common label strings") + return nil + } + + logger.Info( + "Finding most common label strings in last block", + slog.String("block", block.String()), + ) + mappedLabels, err := selectBlockStringsToMap(block) + if err != nil { + return err + } + logger.Info("Mapped common label strings", slog.Int("count", len(mappedLabels))) + labels.MappedLabels = mappedLabels + return nil +} diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index d69648d88b..c43e39f927 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -1242,6 +1242,10 @@ func main() { return fmt.Errorf("opening storage failed: %w", err) } + if err = mapCommonLabelSymbols(db, logger); err != nil { + logger.Warn("Failed to map common strings in labels", slog.Any("err", err)) + } + switch fsType := prom_runtime.Statfs(localStoragePath); fsType { case "NFS_SUPER_MAGIC": logger.Warn("This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.", "fs_type", fsType) diff --git a/model/labels/labels_stringlabels.go b/model/labels/labels_stringlabels.go index 7106aebc55..3ec02ba396 100644 --- a/model/labels/labels_stringlabels.go +++ b/model/labels/labels_stringlabels.go @@ -25,7 +25,7 @@ import ( // List of labels that should be mapped to a single byte value. // Obviously can't have more than 256 here. -var mappedLabels = []string{ +var MappedLabels = []string{ // Empty string, this must be present here. "", // These label names are always present on every time series. @@ -144,13 +144,13 @@ func decodeString(data string, index int) (string, int) { size, index, mapped = decodeSize(data, index) if mapped { b := data[index] - return mappedLabels[int(b)], index + size + return MappedLabels[int(b)], index + size } return data[index : index+size], index + size } func encodeShortString(s string) (int, byte) { - i := slices.Index(mappedLabels, s) + i := slices.Index(MappedLabels, s) if i >= 0 { return 0, byte(i) } From cf9d3a0db56a605d3730d0f8b88ae4f80ca833ca Mon Sep 17 00:00:00 2001 From: Lukasz Mierzwa Date: Fri, 21 Feb 2025 10:22:26 +0000 Subject: [PATCH 6/7] Add labels.MapLabels() for setting mapped labels This makes building labels faster by having a fast lookup for string->index path via a map. Since we now need to populate both the slice that maps index->string and a map that gives us string->index. For that we add labels.MapLabels() function which handles updating the static mapping. Signed-off-by: Lukasz Mierzwa --- cmd/prometheus/labels_stringlabels.go | 5 +- model/labels/labels_stringlabels.go | 205 +++++++++++++++----------- 2 files changed, 121 insertions(+), 89 deletions(-) diff --git a/cmd/prometheus/labels_stringlabels.go b/cmd/prometheus/labels_stringlabels.go index f63e0b896c..db9a7d9739 100644 --- a/cmd/prometheus/labels_stringlabels.go +++ b/cmd/prometheus/labels_stringlabels.go @@ -101,9 +101,8 @@ func selectBlockStringsToMap(block *tsdb.Block) ([]string, error) { }) mappedLabels := make([]string, 0, 256) - mappedLabels = append(mappedLabels, "") // We must always store empty string. for i, c := range costs { - if i > 254 { + if i >= 256 { break } mappedLabels = append(mappedLabels, c.name) @@ -132,6 +131,6 @@ func mapCommonLabelSymbols(db *tsdb.DB, logger *slog.Logger) error { return err } logger.Info("Mapped common label strings", slog.Int("count", len(mappedLabels))) - labels.MappedLabels = mappedLabels + labels.MapLabels(mappedLabels) return nil } diff --git a/model/labels/labels_stringlabels.go b/model/labels/labels_stringlabels.go index 3ec02ba396..c40255108e 100644 --- a/model/labels/labels_stringlabels.go +++ b/model/labels/labels_stringlabels.go @@ -23,88 +23,122 @@ import ( "github.com/cespare/xxhash/v2" ) -// List of labels that should be mapped to a single byte value. -// Obviously can't have more than 256 here. -var MappedLabels = []string{ - // Empty string, this must be present here. - "", - // These label names are always present on every time series. - MetricName, - InstanceName, - "job", - // Common label names. - BucketLabel, - "code", - "handler", - "quantile", - // Meta metric names injected by Prometheus itself. - "scrape_body_size_bytes", - "scrape_duration_seconds", - "scrape_sample_limit", - "scrape_samples_post_metric_relabeling", - "scrape_samples_scraped", - "scrape_series_added", - "scrape_timeout_seconds", - // Common metric names from client libraries. - "process_cpu_seconds_total", - "process_max_fds", - "process_network_receive_bytes_total", - "process_network_transmit_bytes_total", - "process_open_fds", - "process_resident_memory_bytes", - "process_start_time_seconds ", - "process_virtual_memory_bytes", - "process_virtual_memory_max_bytes", - // client_go specific metrics - "go_gc_heap_frees_by_size_bytes_bucket", - "go_gc_heap_allocs_by_size_bytes_bucket", - "net_conntrack_dialer_conn_failed_total", - "go_sched_pauses_total_other_seconds_bucket", - "go_sched_pauses_total_gc_seconds_bucket", - "go_sched_pauses_stopping_other_seconds_bucket", - "go_sched_pauses_stopping_gc_seconds_bucket", - "go_sched_latencies_seconds_bucket", - "go_gc_pauses_seconds_bucket", - "go_gc_duration_seconds", - // node_exporter metrics - "node_cpu_seconds_total", - "node_scrape_collector_success", - "node_scrape_collector_duration_seconds", - "node_cpu_scaling_governor", - "node_cpu_guest_seconds_total", - "node_hwmon_temp_celsius", - "node_hwmon_sensor_label", - "node_hwmon_temp_max_celsius", - "node_cooling_device_max_state", - "node_cooling_device_cur_state", - "node_softnet_times_squeezed_total", - "node_softnet_received_rps_total", - "node_softnet_processed_total", - "node_softnet_flow_limit_count_total", - "node_softnet_dropped_total", - "node_softnet_cpu_collision_total", - "node_softnet_backlog_len", - "node_schedstat_waiting_seconds_total", - "node_schedstat_timeslices_total", - "node_schedstat_running_seconds_total", - "node_cpu_scaling_frequency_min_hertz", - "node_cpu_scaling_frequency_max_hertz", - "node_cpu_scaling_frequency_hertz", - "node_cpu_frequency_min_hertz", - "node_cpu_frequency_max_hertz", - "node_hwmon_temp_crit_celsius", - "node_hwmon_temp_crit_alarm_celsius", - "node_cpu_core_throttles_total", - "node_thermal_zone_temp", - "node_hwmon_temp_min_celsius", - "node_hwmon_chip_names", - "node_filesystem_readonly", - "node_filesystem_device_error", - "node_filesystem_size_bytes", - "node_filesystem_free_bytes", - "node_filesystem_files_free", - "node_filesystem_files", - "node_filesystem_avail_bytes", +var ( + // List of labels that should be mapped to a single byte value. + // Obviously can't have more than 256 here. + mappedLabels = []string{} + mappedLabelIndex = map[string]byte{} +) + +// MapLabels takes a list of strings that shuld use a single byte storage +// inside labels, making them use as little memory as possible. +// Since we use a single byte mapping we can only have 256 such strings. +// +// We MUST store empty string ("") as one of the values here and if you +// don't pass it into MapLabels() then it will be injected. +// +// If you pass more strings than 256 then extra strings will be ignored. +func MapLabels(names []string) { + // We must always store empty string. Push it to the front of the slice if not present. + if !slices.Contains(names, "") { + names = append([]string{""}, names...) + } + + mappedLabels = make([]string, 0, 256) + mappedLabelIndex = make(map[string]byte, 256) + + for i, name := range names { + if i >= 256 { + break + } + mappedLabels = append(mappedLabels, name) + mappedLabelIndex[name] = byte(i) + } +} + +func init() { + names := []string{ + // Empty string, this must be present here. + "", + // These label names are always present on every time series. + MetricName, + InstanceName, + "job", + // Common label names. + BucketLabel, + "code", + "handler", + "quantile", + // Meta metric names injected by Prometheus itself. + "scrape_body_size_bytes", + "scrape_duration_seconds", + "scrape_sample_limit", + "scrape_samples_post_metric_relabeling", + "scrape_samples_scraped", + "scrape_series_added", + "scrape_timeout_seconds", + // Common metric names from client libraries. + "process_cpu_seconds_total", + "process_max_fds", + "process_network_receive_bytes_total", + "process_network_transmit_bytes_total", + "process_open_fds", + "process_resident_memory_bytes", + "process_start_time_seconds ", + "process_virtual_memory_bytes", + "process_virtual_memory_max_bytes", + // client_go specific metrics + "go_gc_heap_frees_by_size_bytes_bucket", + "go_gc_heap_allocs_by_size_bytes_bucket", + "net_conntrack_dialer_conn_failed_total", + "go_sched_pauses_total_other_seconds_bucket", + "go_sched_pauses_total_gc_seconds_bucket", + "go_sched_pauses_stopping_other_seconds_bucket", + "go_sched_pauses_stopping_gc_seconds_bucket", + "go_sched_latencies_seconds_bucket", + "go_gc_pauses_seconds_bucket", + "go_gc_duration_seconds", + // node_exporter metrics + "node_cpu_seconds_total", + "node_scrape_collector_success", + "node_scrape_collector_duration_seconds", + "node_cpu_scaling_governor", + "node_cpu_guest_seconds_total", + "node_hwmon_temp_celsius", + "node_hwmon_sensor_label", + "node_hwmon_temp_max_celsius", + "node_cooling_device_max_state", + "node_cooling_device_cur_state", + "node_softnet_times_squeezed_total", + "node_softnet_received_rps_total", + "node_softnet_processed_total", + "node_softnet_flow_limit_count_total", + "node_softnet_dropped_total", + "node_softnet_cpu_collision_total", + "node_softnet_backlog_len", + "node_schedstat_waiting_seconds_total", + "node_schedstat_timeslices_total", + "node_schedstat_running_seconds_total", + "node_cpu_scaling_frequency_min_hertz", + "node_cpu_scaling_frequency_max_hertz", + "node_cpu_scaling_frequency_hertz", + "node_cpu_frequency_min_hertz", + "node_cpu_frequency_max_hertz", + "node_hwmon_temp_crit_celsius", + "node_hwmon_temp_crit_alarm_celsius", + "node_cpu_core_throttles_total", + "node_thermal_zone_temp", + "node_hwmon_temp_min_celsius", + "node_hwmon_chip_names", + "node_filesystem_readonly", + "node_filesystem_device_error", + "node_filesystem_size_bytes", + "node_filesystem_free_bytes", + "node_filesystem_files_free", + "node_filesystem_files", + "node_filesystem_avail_bytes", + } + MapLabels(names) } // Labels is implemented by a single flat string holding name/value pairs. @@ -144,15 +178,14 @@ func decodeString(data string, index int) (string, int) { size, index, mapped = decodeSize(data, index) if mapped { b := data[index] - return MappedLabels[int(b)], index + size + return mappedLabels[int(b)], index + size } return data[index : index+size], index + size } func encodeShortString(s string) (int, byte) { - i := slices.Index(MappedLabels, s) - if i >= 0 { - return 0, byte(i) + if i, ok := mappedLabelIndex[s]; ok { + return 0, i } return len(s), 0 } From cdf4b3ec3a72c98ac6c9b035d71c4330d9653c2b Mon Sep 17 00:00:00 2001 From: Lukasz Mierzwa Date: Tue, 25 Feb 2025 12:43:24 +0000 Subject: [PATCH 7/7] Add a TSDB PreInitFunc hook and move mapCommonLabelSymbols() call there We need to call mapCommonLabelSymbols() once TSDB opens all blocks, but before we start to reply the WAL and populate the HEAD. There doesn't seem to be a way to do this right now, so add a hook we can use for it. Signed-off-by: Lukasz Mierzwa --- cmd/prometheus/main.go | 12 ++++++++---- tsdb/db.go | 9 +++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index c43e39f927..5b05f225cc 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -853,6 +853,12 @@ func main() { cfg.web.Flags = map[string]string{} + cfg.tsdb.PreInitFunc = func(db *tsdb.DB) { + if err = mapCommonLabelSymbols(db, logger); err != nil { + logger.Warn("Failed to map common strings in labels", slog.Any("err", err)) + } + } + // Exclude kingpin default flags to expose only Prometheus ones. boilerplateFlags := kingpin.New("", "").Version("") for _, f := range a.Model().Flags { @@ -1242,10 +1248,6 @@ func main() { return fmt.Errorf("opening storage failed: %w", err) } - if err = mapCommonLabelSymbols(db, logger); err != nil { - logger.Warn("Failed to map common strings in labels", slog.Any("err", err)) - } - switch fsType := prom_runtime.Statfs(localStoragePath); fsType { case "NFS_SUPER_MAGIC": logger.Warn("This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.", "fs_type", fsType) @@ -1801,6 +1803,7 @@ type tsdbOptions struct { CompactionDelayMaxPercent int EnableOverlappingCompaction bool EnableOOONativeHistograms bool + PreInitFunc tsdb.PreInitFunc } func (opts tsdbOptions) ToTSDBOptions() tsdb.Options { @@ -1825,6 +1828,7 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options { EnableDelayedCompaction: opts.EnableDelayedCompaction, CompactionDelayMaxPercent: opts.CompactionDelayMaxPercent, EnableOverlappingCompaction: opts.EnableOverlappingCompaction, + PreInitFunc: opts.PreInitFunc, } } diff --git a/tsdb/db.go b/tsdb/db.go index 9ab150c5b4..bd02ea4103 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -224,6 +224,9 @@ type Options struct { // PostingsDecoderFactory allows users to customize postings decoders based on BlockMeta. // By default, DefaultPostingsDecoderFactory will be used to create raw posting decoder. PostingsDecoderFactory PostingsDecoderFactory + + // PreInitFunc is a function that will be called before the HEAD is initialized. + PreInitFunc PreInitFunc } type NewCompactorFunc func(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, opts *Options) (Compactor, error) @@ -234,6 +237,8 @@ type BlockQuerierFunc func(b BlockReader, mint, maxt int64) (storage.Querier, er type BlockChunkQuerierFunc func(b BlockReader, mint, maxt int64) (storage.ChunkQuerier, error) +type PreInitFunc func(*DB) + // DB handles reads and writes of time series falling into // a hashed partition of a seriedb. type DB struct { @@ -1011,6 +1016,10 @@ func open(dir string, l *slog.Logger, r prometheus.Registerer, opts *Options, rn minValidTime = inOrderMaxTime } + if db.opts.PreInitFunc != nil { + db.opts.PreInitFunc(db) + } + if initErr := db.head.Init(minValidTime); initErr != nil { db.head.metrics.walCorruptionsTotal.Inc() var e *errLoadWbl