diff --git a/cmd/prometheus/labels.go b/cmd/prometheus/labels.go new file mode 100644 index 0000000000..4f6d155295 --- /dev/null +++ b/cmd/prometheus/labels.go @@ -0,0 +1,26 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !stringlabels + +package main + +import ( + "log/slog" + + "github.com/prometheus/prometheus/tsdb" +) + +func mapCommonLabelSymbols(_ *tsdb.DB, _ *slog.Logger) error { + return nil +} diff --git a/cmd/prometheus/labels_stringlabels.go b/cmd/prometheus/labels_stringlabels.go new file mode 100644 index 0000000000..f63e0b896c --- /dev/null +++ b/cmd/prometheus/labels_stringlabels.go @@ -0,0 +1,137 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build stringlabels + +package main + +import ( + "cmp" + "context" + "fmt" + "log/slog" + "slices" + "strings" + + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/tsdb" + "github.com/prometheus/prometheus/tsdb/index" +) + +// countBlockSymbols reads given block index and counts how many time each string +// occurs on time series labels. +func countBlockSymbols(ctx context.Context, block *tsdb.Block) (map[string]int, error) { + names := map[string]int{} + + ir, err := block.Index() + if err != nil { + return names, err + } + + labelNames, err := ir.LabelNames(ctx) + if err != nil { + return names, err + } + + for _, name := range labelNames { + name = strings.Clone(name) + + if _, ok := names[name]; !ok { + names[name] = 0 + } + + values, err := ir.LabelValues(ctx, name) + if err != nil { + return names, err + } + for _, value := range values { + value = strings.Clone(value) + + if _, ok := names[value]; !ok { + names[value] = 0 + } + + p, err := ir.Postings(ctx, name, value) + if err != nil { + return names, err + } + + refs, err := index.ExpandPostings(p) + if err != nil { + return names, err + } + + names[name] += len(refs) + names[value] += len(refs) + } + } + return names, ir.Close() +} + +type labelCost struct { + name string + cost int +} + +// selectBlockStringsToMap takes a block and returns a list of strings that are most commonly +// present on all time series. +// List is sorted starting with the most frequent strings. +func selectBlockStringsToMap(block *tsdb.Block) ([]string, error) { + names, err := countBlockSymbols(context.Background(), block) + if err != nil { + return nil, fmt.Errorf("failed to build list of common strings in block %s: %w", block.Meta().ULID, err) + } + + costs := make([]labelCost, 0, len(names)) + for name, count := range names { + costs = append(costs, labelCost{name: name, cost: (len(name) - 1) * count}) + } + slices.SortFunc(costs, func(a, b labelCost) int { + return cmp.Compare(b.cost, a.cost) + }) + + mappedLabels := make([]string, 0, 256) + mappedLabels = append(mappedLabels, "") // We must always store empty string. + for i, c := range costs { + if i > 254 { + break + } + mappedLabels = append(mappedLabels, c.name) + } + return mappedLabels, nil +} + +func mapCommonLabelSymbols(db *tsdb.DB, logger *slog.Logger) error { + var block *tsdb.Block + for _, b := range db.Blocks() { + if block == nil || b.MaxTime() > block.MaxTime() { + block = b + } + } + if block == nil { + logger.Info("No tsdb blocks found, can't map common label strings") + return nil + } + + logger.Info( + "Finding most common label strings in last block", + slog.String("block", block.String()), + ) + mappedLabels, err := selectBlockStringsToMap(block) + if err != nil { + return err + } + logger.Info("Mapped common label strings", slog.Int("count", len(mappedLabels))) + labels.MappedLabels = mappedLabels + return nil +} diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index d69648d88b..c43e39f927 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -1242,6 +1242,10 @@ func main() { return fmt.Errorf("opening storage failed: %w", err) } + if err = mapCommonLabelSymbols(db, logger); err != nil { + logger.Warn("Failed to map common strings in labels", slog.Any("err", err)) + } + switch fsType := prom_runtime.Statfs(localStoragePath); fsType { case "NFS_SUPER_MAGIC": logger.Warn("This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.", "fs_type", fsType) diff --git a/model/labels/labels_stringlabels.go b/model/labels/labels_stringlabels.go index 7106aebc55..3ec02ba396 100644 --- a/model/labels/labels_stringlabels.go +++ b/model/labels/labels_stringlabels.go @@ -25,7 +25,7 @@ import ( // List of labels that should be mapped to a single byte value. // Obviously can't have more than 256 here. -var mappedLabels = []string{ +var MappedLabels = []string{ // Empty string, this must be present here. "", // These label names are always present on every time series. @@ -144,13 +144,13 @@ func decodeString(data string, index int) (string, int) { size, index, mapped = decodeSize(data, index) if mapped { b := data[index] - return mappedLabels[int(b)], index + size + return MappedLabels[int(b)], index + size } return data[index : index+size], index + size } func encodeShortString(s string) (int, byte) { - i := slices.Index(mappedLabels, s) + i := slices.Index(MappedLabels, s) if i >= 0 { return 0, byte(i) }