From 24dd39bc578ddc81fcc3214cb747b5edc8111e3e Mon Sep 17 00:00:00 2001 From: Lukasz Mierzwa Date: Tue, 18 Feb 2025 12:06:22 +0000 Subject: [PATCH] Automatically select common strings to map from last tsdb block This will populate the static mapping of strings to store as a single byte on startup. We use the last TSDB block as the source of data, iterate the index for each label and count how many time series given label pair is referencing. Signed-off-by: Lukasz Mierzwa --- cmd/prometheus/labels.go | 26 +++++ cmd/prometheus/labels_stringlabels.go | 137 ++++++++++++++++++++++++++ cmd/prometheus/main.go | 4 + model/labels/labels_stringlabels.go | 6 +- 4 files changed, 170 insertions(+), 3 deletions(-) create mode 100644 cmd/prometheus/labels.go create mode 100644 cmd/prometheus/labels_stringlabels.go diff --git a/cmd/prometheus/labels.go b/cmd/prometheus/labels.go new file mode 100644 index 0000000000..4f6d155295 --- /dev/null +++ b/cmd/prometheus/labels.go @@ -0,0 +1,26 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !stringlabels + +package main + +import ( + "log/slog" + + "github.com/prometheus/prometheus/tsdb" +) + +func mapCommonLabelSymbols(_ *tsdb.DB, _ *slog.Logger) error { + return nil +} diff --git a/cmd/prometheus/labels_stringlabels.go b/cmd/prometheus/labels_stringlabels.go new file mode 100644 index 0000000000..f63e0b896c --- /dev/null +++ b/cmd/prometheus/labels_stringlabels.go @@ -0,0 +1,137 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build stringlabels + +package main + +import ( + "cmp" + "context" + "fmt" + "log/slog" + "slices" + "strings" + + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/tsdb" + "github.com/prometheus/prometheus/tsdb/index" +) + +// countBlockSymbols reads given block index and counts how many time each string +// occurs on time series labels. +func countBlockSymbols(ctx context.Context, block *tsdb.Block) (map[string]int, error) { + names := map[string]int{} + + ir, err := block.Index() + if err != nil { + return names, err + } + + labelNames, err := ir.LabelNames(ctx) + if err != nil { + return names, err + } + + for _, name := range labelNames { + name = strings.Clone(name) + + if _, ok := names[name]; !ok { + names[name] = 0 + } + + values, err := ir.LabelValues(ctx, name) + if err != nil { + return names, err + } + for _, value := range values { + value = strings.Clone(value) + + if _, ok := names[value]; !ok { + names[value] = 0 + } + + p, err := ir.Postings(ctx, name, value) + if err != nil { + return names, err + } + + refs, err := index.ExpandPostings(p) + if err != nil { + return names, err + } + + names[name] += len(refs) + names[value] += len(refs) + } + } + return names, ir.Close() +} + +type labelCost struct { + name string + cost int +} + +// selectBlockStringsToMap takes a block and returns a list of strings that are most commonly +// present on all time series. +// List is sorted starting with the most frequent strings. +func selectBlockStringsToMap(block *tsdb.Block) ([]string, error) { + names, err := countBlockSymbols(context.Background(), block) + if err != nil { + return nil, fmt.Errorf("failed to build list of common strings in block %s: %w", block.Meta().ULID, err) + } + + costs := make([]labelCost, 0, len(names)) + for name, count := range names { + costs = append(costs, labelCost{name: name, cost: (len(name) - 1) * count}) + } + slices.SortFunc(costs, func(a, b labelCost) int { + return cmp.Compare(b.cost, a.cost) + }) + + mappedLabels := make([]string, 0, 256) + mappedLabels = append(mappedLabels, "") // We must always store empty string. + for i, c := range costs { + if i > 254 { + break + } + mappedLabels = append(mappedLabels, c.name) + } + return mappedLabels, nil +} + +func mapCommonLabelSymbols(db *tsdb.DB, logger *slog.Logger) error { + var block *tsdb.Block + for _, b := range db.Blocks() { + if block == nil || b.MaxTime() > block.MaxTime() { + block = b + } + } + if block == nil { + logger.Info("No tsdb blocks found, can't map common label strings") + return nil + } + + logger.Info( + "Finding most common label strings in last block", + slog.String("block", block.String()), + ) + mappedLabels, err := selectBlockStringsToMap(block) + if err != nil { + return err + } + logger.Info("Mapped common label strings", slog.Int("count", len(mappedLabels))) + labels.MappedLabels = mappedLabels + return nil +} diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index d69648d88b..c43e39f927 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -1242,6 +1242,10 @@ func main() { return fmt.Errorf("opening storage failed: %w", err) } + if err = mapCommonLabelSymbols(db, logger); err != nil { + logger.Warn("Failed to map common strings in labels", slog.Any("err", err)) + } + switch fsType := prom_runtime.Statfs(localStoragePath); fsType { case "NFS_SUPER_MAGIC": logger.Warn("This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.", "fs_type", fsType) diff --git a/model/labels/labels_stringlabels.go b/model/labels/labels_stringlabels.go index 7106aebc55..3ec02ba396 100644 --- a/model/labels/labels_stringlabels.go +++ b/model/labels/labels_stringlabels.go @@ -25,7 +25,7 @@ import ( // List of labels that should be mapped to a single byte value. // Obviously can't have more than 256 here. -var mappedLabels = []string{ +var MappedLabels = []string{ // Empty string, this must be present here. "", // These label names are always present on every time series. @@ -144,13 +144,13 @@ func decodeString(data string, index int) (string, int) { size, index, mapped = decodeSize(data, index) if mapped { b := data[index] - return mappedLabels[int(b)], index + size + return MappedLabels[int(b)], index + size } return data[index : index+size], index + size } func encodeShortString(s string) (int, byte) { - i := slices.Index(mappedLabels, s) + i := slices.Index(MappedLabels, s) if i >= 0 { return 0, byte(i) }