mirror of
https://github.com/prometheus/prometheus.git
synced 2025-03-05 20:59:13 -08:00
Automatically select common strings to map from last tsdb block
This will populate the static mapping of strings to store as a single byte on startup. We use the last TSDB block as the source of data, iterate the index for each label and count how many time series given label pair is referencing. Signed-off-by: Lukasz Mierzwa <l.mierzwa@gmail.com>
This commit is contained in:
parent
b07a131829
commit
24dd39bc57
26
cmd/prometheus/labels.go
Normal file
26
cmd/prometheus/labels.go
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
// Copyright 2017 The Prometheus Authors
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
//go:build !stringlabels
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log/slog"
|
||||||
|
|
||||||
|
"github.com/prometheus/prometheus/tsdb"
|
||||||
|
)
|
||||||
|
|
||||||
|
func mapCommonLabelSymbols(_ *tsdb.DB, _ *slog.Logger) error {
|
||||||
|
return nil
|
||||||
|
}
|
137
cmd/prometheus/labels_stringlabels.go
Normal file
137
cmd/prometheus/labels_stringlabels.go
Normal file
|
@ -0,0 +1,137 @@
|
||||||
|
// Copyright 2017 The Prometheus Authors
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
//go:build stringlabels
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"cmp"
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"slices"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/prometheus/prometheus/model/labels"
|
||||||
|
"github.com/prometheus/prometheus/tsdb"
|
||||||
|
"github.com/prometheus/prometheus/tsdb/index"
|
||||||
|
)
|
||||||
|
|
||||||
|
// countBlockSymbols reads given block index and counts how many time each string
|
||||||
|
// occurs on time series labels.
|
||||||
|
func countBlockSymbols(ctx context.Context, block *tsdb.Block) (map[string]int, error) {
|
||||||
|
names := map[string]int{}
|
||||||
|
|
||||||
|
ir, err := block.Index()
|
||||||
|
if err != nil {
|
||||||
|
return names, err
|
||||||
|
}
|
||||||
|
|
||||||
|
labelNames, err := ir.LabelNames(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return names, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, name := range labelNames {
|
||||||
|
name = strings.Clone(name)
|
||||||
|
|
||||||
|
if _, ok := names[name]; !ok {
|
||||||
|
names[name] = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
values, err := ir.LabelValues(ctx, name)
|
||||||
|
if err != nil {
|
||||||
|
return names, err
|
||||||
|
}
|
||||||
|
for _, value := range values {
|
||||||
|
value = strings.Clone(value)
|
||||||
|
|
||||||
|
if _, ok := names[value]; !ok {
|
||||||
|
names[value] = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
p, err := ir.Postings(ctx, name, value)
|
||||||
|
if err != nil {
|
||||||
|
return names, err
|
||||||
|
}
|
||||||
|
|
||||||
|
refs, err := index.ExpandPostings(p)
|
||||||
|
if err != nil {
|
||||||
|
return names, err
|
||||||
|
}
|
||||||
|
|
||||||
|
names[name] += len(refs)
|
||||||
|
names[value] += len(refs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return names, ir.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
type labelCost struct {
|
||||||
|
name string
|
||||||
|
cost int
|
||||||
|
}
|
||||||
|
|
||||||
|
// selectBlockStringsToMap takes a block and returns a list of strings that are most commonly
|
||||||
|
// present on all time series.
|
||||||
|
// List is sorted starting with the most frequent strings.
|
||||||
|
func selectBlockStringsToMap(block *tsdb.Block) ([]string, error) {
|
||||||
|
names, err := countBlockSymbols(context.Background(), block)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to build list of common strings in block %s: %w", block.Meta().ULID, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
costs := make([]labelCost, 0, len(names))
|
||||||
|
for name, count := range names {
|
||||||
|
costs = append(costs, labelCost{name: name, cost: (len(name) - 1) * count})
|
||||||
|
}
|
||||||
|
slices.SortFunc(costs, func(a, b labelCost) int {
|
||||||
|
return cmp.Compare(b.cost, a.cost)
|
||||||
|
})
|
||||||
|
|
||||||
|
mappedLabels := make([]string, 0, 256)
|
||||||
|
mappedLabels = append(mappedLabels, "") // We must always store empty string.
|
||||||
|
for i, c := range costs {
|
||||||
|
if i > 254 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
mappedLabels = append(mappedLabels, c.name)
|
||||||
|
}
|
||||||
|
return mappedLabels, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func mapCommonLabelSymbols(db *tsdb.DB, logger *slog.Logger) error {
|
||||||
|
var block *tsdb.Block
|
||||||
|
for _, b := range db.Blocks() {
|
||||||
|
if block == nil || b.MaxTime() > block.MaxTime() {
|
||||||
|
block = b
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if block == nil {
|
||||||
|
logger.Info("No tsdb blocks found, can't map common label strings")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.Info(
|
||||||
|
"Finding most common label strings in last block",
|
||||||
|
slog.String("block", block.String()),
|
||||||
|
)
|
||||||
|
mappedLabels, err := selectBlockStringsToMap(block)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
logger.Info("Mapped common label strings", slog.Int("count", len(mappedLabels)))
|
||||||
|
labels.MappedLabels = mappedLabels
|
||||||
|
return nil
|
||||||
|
}
|
|
@ -1242,6 +1242,10 @@ func main() {
|
||||||
return fmt.Errorf("opening storage failed: %w", err)
|
return fmt.Errorf("opening storage failed: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err = mapCommonLabelSymbols(db, logger); err != nil {
|
||||||
|
logger.Warn("Failed to map common strings in labels", slog.Any("err", err))
|
||||||
|
}
|
||||||
|
|
||||||
switch fsType := prom_runtime.Statfs(localStoragePath); fsType {
|
switch fsType := prom_runtime.Statfs(localStoragePath); fsType {
|
||||||
case "NFS_SUPER_MAGIC":
|
case "NFS_SUPER_MAGIC":
|
||||||
logger.Warn("This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.", "fs_type", fsType)
|
logger.Warn("This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.", "fs_type", fsType)
|
||||||
|
|
|
@ -25,7 +25,7 @@ import (
|
||||||
|
|
||||||
// List of labels that should be mapped to a single byte value.
|
// List of labels that should be mapped to a single byte value.
|
||||||
// Obviously can't have more than 256 here.
|
// Obviously can't have more than 256 here.
|
||||||
var mappedLabels = []string{
|
var MappedLabels = []string{
|
||||||
// Empty string, this must be present here.
|
// Empty string, this must be present here.
|
||||||
"",
|
"",
|
||||||
// These label names are always present on every time series.
|
// These label names are always present on every time series.
|
||||||
|
@ -144,13 +144,13 @@ func decodeString(data string, index int) (string, int) {
|
||||||
size, index, mapped = decodeSize(data, index)
|
size, index, mapped = decodeSize(data, index)
|
||||||
if mapped {
|
if mapped {
|
||||||
b := data[index]
|
b := data[index]
|
||||||
return mappedLabels[int(b)], index + size
|
return MappedLabels[int(b)], index + size
|
||||||
}
|
}
|
||||||
return data[index : index+size], index + size
|
return data[index : index+size], index + size
|
||||||
}
|
}
|
||||||
|
|
||||||
func encodeShortString(s string) (int, byte) {
|
func encodeShortString(s string) (int, byte) {
|
||||||
i := slices.Index(mappedLabels, s)
|
i := slices.Index(MappedLabels, s)
|
||||||
if i >= 0 {
|
if i >= 0 {
|
||||||
return 0, byte(i)
|
return 0, byte(i)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue