diff --git a/storage/utf8_migration.go b/storage/utf8_migration.go new file mode 100644 index 000000000..791f66801 --- /dev/null +++ b/storage/utf8_migration.go @@ -0,0 +1,261 @@ +package storage + +import ( + "context" + "slices" + + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/tsdb/chunkenc" + "github.com/prometheus/prometheus/tsdb/chunks" + "github.com/prometheus/prometheus/util/annotations" + "github.com/prometheus/prometheus/util/strutil" +) + +type mixedUTF8BlockQuerier struct { + Querier + es model.EscapingScheme +} + +func (q *mixedUTF8BlockQuerier) LabelValues(ctx context.Context, name string, hints *LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { + vals, an, err := q.Querier.LabelValues(ctx, name, hints, matchers...) + if err != nil { + return nil, nil, err + } + newMatchers, escaped, original, ok := escapeUTF8NameMatcher(matchers, q.es) + var vals2 []string + if ok { + vals2, _, err = q.Querier.LabelValues(ctx, name, hints, newMatchers...) + if err == nil && name == model.MetricNameLabel { + for i := range vals2 { + if vals2[i] == escaped { + vals2[i] = original + } + } + } + vals = strutil.MergeStrings(vals, vals2) + } + if ix := slices.Index(vals, ""); ix != -1 && len(vals) > 1 { + vals = append(vals[:ix], vals[ix+1:]...) + } + return vals, an, err +} + +func (q *mixedUTF8BlockQuerier) LabelNames(ctx context.Context, hints *LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { + names, an, err := q.Querier.LabelNames(ctx, hints, matchers...) + if err != nil { + return nil, nil, err + } + newMatchers, _, _, ok := escapeUTF8NameMatcher(matchers, q.es) + if ok { + names2, _, err := q.Querier.LabelNames(ctx, hints, newMatchers...) + if err == nil { + names = strutil.MergeStrings(names, names2) + } + } + return names, an, err +} + +func (q *mixedUTF8BlockQuerier) Select(ctx context.Context, sortSeries bool, hints *SelectHints, matchers ...*labels.Matcher) SeriesSet { + newMatchers, escaped, original, ok := escapeUTF8NameMatcher(matchers, q.es) + + if !ok { + return q.Querier.Select(ctx, sortSeries, hints, matchers...) + } + + // We need to sort for merge to work. + return NewMergeSeriesSet([]SeriesSet{ + q.Querier.Select(ctx, true, hints, matchers...), + &metricRenameSeriesSet{SeriesSet: q.Querier.Select(ctx, true, hints, newMatchers...), from: escaped, to: original}, + }, ChainedSeriesMerge) +} + +type mixedUTF8BlockChunkQuerier struct { + ChunkQuerier + es model.EscapingScheme +} + +func (q *mixedUTF8BlockChunkQuerier) LabelValues(ctx context.Context, name string, hints *LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { + vals, an, err := q.ChunkQuerier.LabelValues(ctx, name, hints, matchers...) + if err != nil { + return nil, nil, err + } + newMatchers, escaped, original, ok := escapeUTF8NameMatcher(matchers, q.es) + var vals2 []string + if ok { + vals2, _, err = q.ChunkQuerier.LabelValues(ctx, name, hints, newMatchers...) + if err == nil && name == model.MetricNameLabel { + for i := range vals2 { + if vals2[i] == escaped { + vals2[i] = original + } + } + } + vals = strutil.MergeStrings(vals, vals2) + } + if ix := slices.Index(vals, ""); ix != -1 && len(vals) > 1 { + vals = append(vals[:ix], vals[ix+1:]...) + } + return vals, an, err +} + +func (q *mixedUTF8BlockChunkQuerier) LabelNames(ctx context.Context, hints *LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { + names, an, err := q.ChunkQuerier.LabelNames(ctx, hints, matchers...) + if err != nil { + return nil, nil, err + } + newMatchers, _, _, ok := escapeUTF8NameMatcher(matchers, q.es) + if ok { + names2, _, err := q.ChunkQuerier.LabelNames(ctx, hints, newMatchers...) + if err == nil { + names = strutil.MergeStrings(names, names2) + } + } + return names, an, err +} + +func (q *mixedUTF8BlockChunkQuerier) Select(ctx context.Context, sortSeries bool, hints *SelectHints, matchers ...*labels.Matcher) ChunkSeriesSet { + newMatchers, escaped, original, ok := escapeUTF8NameMatcher(matchers, q.es) + + if !ok { + return q.ChunkQuerier.Select(ctx, sortSeries, hints, matchers...) + } + + // We need to sort for merge to work. + return NewMergeChunkSeriesSet([]ChunkSeriesSet{ + q.ChunkQuerier.Select(ctx, true, hints, matchers...), + &metricRenameChunkSeriesSet{ChunkSeriesSet: q.ChunkQuerier.Select(ctx, true, hints, newMatchers...), from: escaped, to: original}, + }, NewCompactingChunkSeriesMerger(ChainedSeriesMerge)) +} + +type escapedUTF8BlockQuerier struct { + Querier + es model.EscapingScheme +} + +func (q *escapedUTF8BlockQuerier) LabelValues(ctx context.Context, name string, hints *LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { + panic("not implemented") +} + +func (q *escapedUTF8BlockQuerier) LabelNames(ctx context.Context, hints *LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { + panic("not implemented") +} + +func (q *escapedUTF8BlockQuerier) Select(ctx context.Context, sortSeries bool, hints *SelectHints, matchers ...*labels.Matcher) SeriesSet { + newMatchers, escaped, original, ok := escapeUTF8NameMatcher(matchers, q.es) + if !ok { + return q.Querier.Select(ctx, sortSeries, hints, matchers...) + } + return &metricRenameSeriesSet{SeriesSet: q.Querier.Select(ctx, sortSeries, hints, newMatchers...), from: escaped, to: original} +} + +func escapeUTF8NameMatcher(matchers []*labels.Matcher, es model.EscapingScheme) (newMatchers []*labels.Matcher, escaped, original string, ok bool) { + // TODO: avoid allocation if there is nothing to escape? + newMatchers = make([]*labels.Matcher, len(matchers)) + + for i, m := range matchers { + m2 := *m + if m.Type == labels.MatchEqual && m.Name == model.MetricNameLabel && !model.IsValidLegacyMetricName(m.Value) { + // TODO: what if we get multiple and different __name__ matchers? + // Leaning towards ignoring everything and querying the underlying querier as is. Results will and should be empty. + original = m.Value + m2.Value = model.EscapeName(m.Value, es) + escaped = m2.Value + ok = true + } + newMatchers[i] = &m2 + } + return +} + +type escapedUTF8BlockChunkQuerier struct { + ChunkQuerier + es model.EscapingScheme +} + +func (q *escapedUTF8BlockChunkQuerier) LabelValues(ctx context.Context, name string, hints *LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { + panic("not implemented") +} + +func (q *escapedUTF8BlockChunkQuerier) LabelNames(ctx context.Context, hints *LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { + newMatchers, _, _, ok := escapeUTF8NameMatcher(matchers, q.es) + if ok { + matchers = newMatchers + } + return q.ChunkQuerier.LabelNames(ctx, hints, matchers...) +} + +func (q *escapedUTF8BlockChunkQuerier) Select(ctx context.Context, sortSeries bool, hints *SelectHints, matchers ...*labels.Matcher) ChunkSeriesSet { + newMatchers, escaped, original, ok := escapeUTF8NameMatcher(matchers, q.es) + if !ok { + return q.ChunkQuerier.Select(ctx, sortSeries, hints, matchers...) + } + return &metricRenameChunkSeriesSet{ChunkSeriesSet: q.ChunkQuerier.Select(ctx, sortSeries, hints, + newMatchers...), from: escaped, to: original} +} + +type metricRenameSeriesSet struct { + SeriesSet + from, to string +} + +func (u *metricRenameSeriesSet) At() Series { + lbls := labels.NewScratchBuilder(u.SeriesSet.At().Labels().Len()) + u.SeriesSet.At().Labels().Range(func(l labels.Label) { + // TODO: what if we don't find the label we need to map? That would be + // an important bug, because that would break our assumptions that keep + // the series sorted. Panic? Return Next=false and Err= not nil? + if l.Name == model.MetricNameLabel && l.Value == u.from { + lbls.Add(l.Name, u.to) + } else { + lbls.Add(l.Name, l.Value) + } + }) + + return &SeriesEntry{ + Lset: lbls.Labels(), + SampleIteratorFn: func(it chunkenc.Iterator) chunkenc.Iterator { + return u.SeriesSet.At().Iterator(it) + }, + } +} + +func (u *metricRenameSeriesSet) Warnings() annotations.Annotations { + // Warnings are for the whole set, so no sorting needed. However: + // TODO: can a warning be referencing a metric name? Would that be a problem? I think not, but would be confusing. + // TODO: should we add a warning about the renaming? + return u.SeriesSet.Warnings() +} + +type metricRenameChunkSeriesSet struct { + ChunkSeriesSet + from, to string +} + +func (u *metricRenameChunkSeriesSet) At() ChunkSeries { + lbls := labels.NewScratchBuilder(u.ChunkSeriesSet.At().Labels().Len()) + u.ChunkSeriesSet.At().Labels().Range(func(l labels.Label) { + // TODO: what if we don't find the label we need to map? That would be + // an important bug, because that would break our assumptions that keep + // the series sorted. Panic? Return Next=false and Err= not nil? + if l.Name == model.MetricNameLabel && l.Value == u.from { + lbls.Add(l.Name, u.to) + } else { + lbls.Add(l.Name, l.Value) + } + }) + + return &ChunkSeriesEntry{ + Lset: lbls.Labels(), + ChunkIteratorFn: func(it chunks.Iterator) chunks.Iterator { + return u.ChunkSeriesSet.At().Iterator(it) + }, + } +} + +func (u *metricRenameChunkSeriesSet) Warnings() annotations.Annotations { + // Warnings are for the whole set, so no sorting needed. However: + // TODO: can a warning be referencing a metric name? Would that be a problem? I think not, but would be confusing. + // TODO: should we add a warning about the renaming? + return u.ChunkSeriesSet.Warnings() +} diff --git a/storage/utf8_migration_test.go b/storage/utf8_migration_test.go new file mode 100644 index 000000000..44b72af55 --- /dev/null +++ b/storage/utf8_migration_test.go @@ -0,0 +1,285 @@ +package storage + +// var utf8Data = []seriesSamples{ +// { +// lset: map[string]string{"a": "a", "__name__": "foo.bar"}, +// chunks: [][]sample{ +// {{1, 2, nil, nil}, {2, 3, nil, nil}, {3, 4, nil, nil}}, +// {{5, 2, nil, nil}, {6, 3, nil, nil}, {7, 4, nil, nil}}, +// }, +// }, +// { +// lset: map[string]string{"a": "b", "__name__": "foo.bar"}, +// chunks: [][]sample{ +// {{1, 1, nil, nil}, {2, 2, nil, nil}, {3, 3, nil, nil}}, +// {{5, 3, nil, nil}, {6, 6, nil, nil}}, +// }, +// }, +// { +// lset: map[string]string{"c": "d", "__name__": "baz.qux"}, +// chunks: [][]sample{ +// {{1, 1, nil, nil}, {2, 2, nil, nil}, {3, 3, nil, nil}}, +// {{5, 3, nil, nil}, {6, 6, nil, nil}}, +// }, +// }, +// } + +// var underscoreEscapedUTF8Data = []seriesSamples{ +// { +// lset: map[string]string{"a": "c", "__name__": "foo_bar"}, +// chunks: [][]sample{ +// {{1, 3, nil, nil}, {2, 2, nil, nil}, {3, 6, nil, nil}}, +// {{5, 1, nil, nil}, {6, 7, nil, nil}, {7, 2, nil, nil}}, +// }, +// }, +// { +// lset: map[string]string{"e": "f", "__name__": "baz_qux"}, +// chunks: [][]sample{ +// {{1, 3, nil, nil}, {2, 2, nil, nil}, {3, 6, nil, nil}}, +// {{5, 1, nil, nil}, {6, 7, nil, nil}, {7, 2, nil, nil}}, +// }, +// }, +// { +// lset: map[string]string{"__name__": "another_metric"}, +// chunks: [][]sample{ +// {{1, 41, nil, nil}, {2, 42, nil, nil}, {3, 43, nil, nil}}, +// {{5, 45, nil, nil}, {6, 46, nil, nil}, {7, 47, nil, nil}}, +// }, +// }, +// } + +// var mixedUTF8Data = append(utf8Data, underscoreEscapedUTF8Data...) + +// var s1 = storage.NewListSeries(labels.FromStrings("a", "a", "__name__", "foo.bar"), +// []chunks.Sample{sample{1, 2, nil, nil}, sample{2, 3, nil, nil}, sample{3, 4, nil, nil}, sample{5, 2, nil, nil}, sample{6, 3, nil, nil}, sample{7, 4, nil, nil}}, +// ) +// var c1 = storage.NewListChunkSeriesFromSamples(labels.FromStrings("a", "a", "__name__", "foo.bar"), +// []chunks.Sample{sample{1, 2, nil, nil}, sample{2, 3, nil, nil}, sample{3, 4, nil, nil}}, []chunks.Sample{sample{5, 2, nil, nil}, sample{6, 3, nil, nil}, sample{7, 4, nil, nil}}, +// ) +// var s2 = storage.NewListSeries(labels.FromStrings("a", "b", "__name__", "foo.bar"), +// []chunks.Sample{sample{1, 1, nil, nil}, sample{2, 2, nil, nil}, sample{3, 3, nil, nil}, sample{5, 3, nil, nil}, sample{6, 6, nil, nil}}, +// ) +// var c2 = storage.NewListChunkSeriesFromSamples(labels.FromStrings("a", "b", "__name__", "foo.bar"), +// []chunks.Sample{sample{1, 1, nil, nil}, sample{2, 2, nil, nil}, sample{3, 3, nil, nil}}, []chunks.Sample{sample{5, 3, nil, nil}, sample{6, 6, nil, nil}}, +// ) + +// var s3 = storage.NewListSeries(labels.FromStrings("a", "c", "__name__", "foo_bar"), +// []chunks.Sample{sample{1, 3, nil, nil}, sample{2, 2, nil, nil}, sample{3, 6, nil, nil}, sample{5, 1, nil, nil}, sample{6, 7, nil, nil}, sample{7, 2, nil, nil}}, +// ) +// var c3 = storage.NewListChunkSeriesFromSamples(labels.FromStrings("a", "c", "__name__", "foo_bar"), +// []chunks.Sample{sample{1, 3, nil, nil}, sample{2, 2, nil, nil}, sample{3, 6, nil, nil}}, []chunks.Sample{sample{5, 1, nil, nil}, sample{6, 7, nil, nil}, sample{7, 2, nil, nil}}, +// ) +// var s3Unescaped = storage.NewListSeries(labels.FromStrings("a", "c", "__name__", "foo.bar"), +// []chunks.Sample{sample{1, 3, nil, nil}, sample{2, 2, nil, nil}, sample{3, 6, nil, nil}, sample{5, 1, nil, nil}, sample{6, 7, nil, nil}, sample{7, 2, nil, nil}}, +// ) +// var c3Unescaped = storage.NewListChunkSeriesFromSamples(labels.FromStrings("a", "c", "__name__", "foo.bar"), +// []chunks.Sample{sample{1, 3, nil, nil}, sample{2, 2, nil, nil}, sample{3, 6, nil, nil}}, []chunks.Sample{sample{5, 1, nil, nil}, sample{6, 7, nil, nil}, sample{7, 2, nil, nil}}, +// ) + +// func TestMixedUTF8BlockQuerier_Select(t *testing.T) { +// // TODO(npazosmendez): test cases +// // * same label set is combines and samples are returned in order + +// for _, c := range []querierSelectTestCase{ +// { +// ms: []*labels.Matcher{}, +// exp: newMockSeriesSet([]storage.Series{}), +// expChks: newMockChunkSeriesSet([]storage.ChunkSeries{}), +// }, +// { +// // No __name__= matcher, no-op +// mint: math.MinInt64, +// maxt: math.MaxInt64, +// ms: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "a", ".+")}, +// exp: newMockSeriesSet([]storage.Series{s1, s2, s3}), +// expChks: newMockChunkSeriesSet([]storage.ChunkSeries{c1, c2, c3}), +// }, +// { +// // __name__= matcher, explode query and relabel +// mint: math.MinInt64, +// maxt: math.MaxInt64, +// ms: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "__name__", "foo.bar")}, +// exp: newMockSeriesSet([]storage.Series{s1, s2, s3Unescaped}), +// expChks: newMockChunkSeriesSet([]storage.ChunkSeries{c1, c2, c3Unescaped}), +// }, +// { +// // __name__= matcher plus other labels +// mint: math.MinInt64, +// maxt: math.MaxInt64, +// ms: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "__name__", "foo.bar"), labels.MustNewMatcher(labels.MatchNotEqual, "a", "b")}, +// exp: newMockSeriesSet([]storage.Series{s1, s3Unescaped}), +// expChks: newMockChunkSeriesSet([]storage.ChunkSeries{c1, c3Unescaped}), +// }, +// { +// // No need to escape matcher, no-op +// mint: math.MinInt64, +// maxt: math.MaxInt64, +// ms: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "__name__", "foo_bar")}, +// exp: newMockSeriesSet([]storage.Series{s3}), +// expChks: newMockChunkSeriesSet([]storage.ChunkSeries{c3}), +// }, +// } { +// ir, cr, _, _ := createIdxChkReaders(t, mixedUTF8Data) +// q := &blockQuerier{ +// blockBaseQuerier: &blockBaseQuerier{ +// index: ir, +// chunks: cr, +// tombstones: tombstones.NewMemTombstones(), + +// mint: c.mint, +// maxt: c.maxt, +// }, +// } + +// mixedQ := &mixedUTF8BlockQuerier{ +// blockQuerier: q, +// es: model.UnderscoreEscaping, +// } + +// mixedChunkQ := &mixedUTF8BlockChunkQuerier{ +// blockChunkQuerier: &blockChunkQuerier{ +// blockBaseQuerier: &blockBaseQuerier{ +// index: ir, +// chunks: cr, +// tombstones: tombstones.NewMemTombstones(), +// mint: c.mint, +// maxt: c.maxt, +// }, +// }, +// es: model.UnderscoreEscaping, +// } +// testQueriersSelect(t, c, mixedQ, mixedChunkQ) +// } +// } + +// func TestMixedUTF8BlockQuerier_Labels(t *testing.T) { +// for _, c := range []struct { +// mint, maxt int64 +// ms []*labels.Matcher +// labelName string +// expLabelValues []string +// expLabelNames []string +// }{ +// { +// mint: math.MinInt64, +// maxt: math.MaxInt64, +// ms: []*labels.Matcher{}, +// labelName: "__name__", +// expLabelValues: []string{"another_metric", "baz.qux", "baz_qux", "foo.bar", "foo_bar"}, +// expLabelNames: []string{"", "__name__", "a", "c", "e"}, +// }, +// { +// mint: math.MinInt64, +// maxt: math.MaxInt64, +// ms: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "__name__", "foo.bar")}, +// labelName: "__name__", +// expLabelValues: []string{"foo.bar"}, +// expLabelNames: []string{"__name__", "a"}, +// }, +// { +// mint: math.MinInt64, +// maxt: math.MaxInt64, +// ms: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "__name__", "baz.qux")}, +// labelName: "e", +// expLabelValues: []string{"f"}, +// expLabelNames: []string{"__name__", "c", "e"}, +// }, +// } { +// ir, cr, _, _ := createIdxChkReaders(t, mixedUTF8Data) +// q := &blockQuerier{ +// blockBaseQuerier: &blockBaseQuerier{ +// index: ir, +// chunks: cr, +// tombstones: tombstones.NewMemTombstones(), + +// mint: c.mint, +// maxt: c.maxt, +// }, +// } + +// mixedQ := &mixedUTF8BlockQuerier{ +// blockQuerier: q, +// es: model.UnderscoreEscaping, +// } + +// mixedChunkQ := &mixedUTF8BlockChunkQuerier{ +// blockChunkQuerier: &blockChunkQuerier{ +// blockBaseQuerier: &blockBaseQuerier{ +// index: ir, +// chunks: cr, +// tombstones: tombstones.NewMemTombstones(), +// mint: c.mint, +// maxt: c.maxt, +// }, +// }, +// es: model.UnderscoreEscaping, +// } +// t.Run("LabelValues", func(t *testing.T) { +// lv, _, err := mixedQ.LabelValues(context.Background(), c.labelName, nil, c.ms...) +// require.NoError(t, err) +// require.Equal(t, c.expLabelValues, lv) +// lv, _, err = mixedChunkQ.LabelValues(context.Background(), c.labelName, nil, c.ms...) +// require.NoError(t, err) +// require.Equal(t, c.expLabelValues, lv) +// }) + +// t.Run("LabelNames", func(t *testing.T) { +// ln, _, err := mixedQ.LabelNames(context.Background(), nil, c.ms...) +// require.NoError(t, err) +// require.Equal(t, c.expLabelNames, ln) +// ln, _, err = mixedChunkQ.LabelNames(context.Background(), nil, c.ms...) +// require.NoError(t, err) +// require.Equal(t, c.expLabelNames, ln) +// }) +// require.NoError(t, mixedQ.Close()) +// require.NoError(t, mixedChunkQ.Close()) +// } +// } + +// func TestEscapedUTF8BlockQuerier(t *testing.T) { +// for _, c := range []querierSelectTestCase{ +// { +// ms: []*labels.Matcher{}, +// exp: newMockSeriesSet([]storage.Series{}), +// expChks: newMockChunkSeriesSet([]storage.ChunkSeries{}), +// }, +// { +// mint: math.MinInt64, +// maxt: math.MaxInt64, +// ms: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "a", ".+")}, +// exp: newMockSeriesSet([]storage.Series{s3}), +// expChks: newMockChunkSeriesSet([]storage.ChunkSeries{c3}), +// }, +// } { +// ir, cr, _, _ := createIdxChkReaders(t, underscoreEscapedUTF8Data) +// q := &blockQuerier{ +// blockBaseQuerier: &blockBaseQuerier{ +// index: ir, +// chunks: cr, +// tombstones: tombstones.NewMemTombstones(), + +// mint: c.mint, +// maxt: c.maxt, +// }, +// } + +// escapedQ := &escapedUTF8BlockQuerier{ +// blockQuerier: q, +// es: model.UnderscoreEscaping, +// } + +// escapedChunkQ := &escapedUTF8BlockChunkQuerier{ +// blockChunkQuerier: &blockChunkQuerier{ +// blockBaseQuerier: &blockBaseQuerier{ +// index: ir, +// chunks: cr, +// tombstones: tombstones.NewMemTombstones(), +// mint: c.mint, +// maxt: c.maxt, +// }, +// }, +// es: model.UnderscoreEscaping, +// } +// testQueriersSelect(t, c, escapedQ, escapedChunkQ) +// } +// } diff --git a/tsdb/querier.go b/tsdb/querier.go index 489a23a80..912c95032 100644 --- a/tsdb/querier.go +++ b/tsdb/querier.go @@ -22,7 +22,6 @@ import ( "github.com/oklog/ulid" - "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" @@ -32,7 +31,6 @@ import ( "github.com/prometheus/prometheus/tsdb/index" "github.com/prometheus/prometheus/tsdb/tombstones" "github.com/prometheus/prometheus/util/annotations" - "github.com/prometheus/prometheus/util/strutil" ) // checkContextEveryNIterations is used in some tight loops to check if the context is done. @@ -1256,251 +1254,3 @@ func (cr nopChunkReader) ChunkOrIterable(chunks.Meta) (chunkenc.Chunk, chunkenc. } func (cr nopChunkReader) Close() error { return nil } - -type mixedUTF8BlockQuerier struct { - *blockQuerier - es model.EscapingScheme -} - -func (q *mixedUTF8BlockQuerier) LabelValues(ctx context.Context, name string, hints *storage.LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { - vals, an, err := q.blockQuerier.LabelValues(ctx, name, hints, matchers...) - if err != nil { - return nil, nil, err - } - newMatchers, escaped, original, ok := escapeUTF8NameMatcher(matchers, q.es) - var vals2 []string - if ok { - vals2, _, err = q.blockQuerier.LabelValues(ctx, name, hints, newMatchers...) - if err == nil && name == model.MetricNameLabel { - for i := range vals2 { - if vals2[i] == escaped { - vals2[i] = original - } - } - } - vals = strutil.MergeStrings(vals, vals2) - } - if ix := slices.Index(vals, ""); ix != -1 && len(vals) > 1 { - vals = append(vals[:ix], vals[ix+1:]...) - } - return vals, an, err -} - -func (q *mixedUTF8BlockQuerier) LabelNames(ctx context.Context, hints *storage.LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { - names, an, err := q.blockQuerier.LabelNames(ctx, hints, matchers...) - if err != nil { - return nil, nil, err - } - newMatchers, _, _, ok := escapeUTF8NameMatcher(matchers, q.es) - if ok { - names2, _, err := q.blockQuerier.LabelNames(ctx, hints, newMatchers...) - if err == nil { - names = strutil.MergeStrings(names, names2) - } - } - return names, an, err -} - -func (q *mixedUTF8BlockQuerier) Select(ctx context.Context, sortSeries bool, hints *storage.SelectHints, matchers ...*labels.Matcher) storage.SeriesSet { - newMatchers, escaped, original, ok := escapeUTF8NameMatcher(matchers, q.es) - - if !ok { - return q.blockQuerier.Select(ctx, sortSeries, hints, matchers...) - } - - // We need to sort for merge to work. - return storage.NewMergeSeriesSet([]storage.SeriesSet{ - q.blockQuerier.Select(ctx, true, hints, matchers...), - &metricRenameSeriesSet{SeriesSet: q.blockQuerier.Select(ctx, true, hints, newMatchers...), from: escaped, to: original}, - }, storage.ChainedSeriesMerge) -} - -type mixedUTF8BlockChunkQuerier struct { - *blockChunkQuerier - es model.EscapingScheme -} - -func (q *mixedUTF8BlockChunkQuerier) LabelValues(ctx context.Context, name string, hints *storage.LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { - vals, an, err := q.blockChunkQuerier.LabelValues(ctx, name, hints, matchers...) - if err != nil { - return nil, nil, err - } - newMatchers, escaped, original, ok := escapeUTF8NameMatcher(matchers, q.es) - var vals2 []string - if ok { - vals2, _, err = q.blockChunkQuerier.LabelValues(ctx, name, hints, newMatchers...) - if err == nil && name == model.MetricNameLabel { - for i := range vals2 { - if vals2[i] == escaped { - vals2[i] = original - } - } - } - vals = strutil.MergeStrings(vals, vals2) - } - if ix := slices.Index(vals, ""); ix != -1 && len(vals) > 1 { - vals = append(vals[:ix], vals[ix+1:]...) - } - return vals, an, err -} - -func (q *mixedUTF8BlockChunkQuerier) LabelNames(ctx context.Context, hints *storage.LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { - names, an, err := q.blockChunkQuerier.LabelNames(ctx, hints, matchers...) - if err != nil { - return nil, nil, err - } - newMatchers, _, _, ok := escapeUTF8NameMatcher(matchers, q.es) - if ok { - names2, _, err := q.blockChunkQuerier.LabelNames(ctx, hints, newMatchers...) - if err == nil { - names = strutil.MergeStrings(names, names2) - } - } - return names, an, err -} - -func (q *mixedUTF8BlockChunkQuerier) Select(ctx context.Context, sortSeries bool, hints *storage.SelectHints, matchers ...*labels.Matcher) storage.ChunkSeriesSet { - newMatchers, escaped, original, ok := escapeUTF8NameMatcher(matchers, q.es) - - if !ok { - return q.blockChunkQuerier.Select(ctx, sortSeries, hints, matchers...) - } - - // We need to sort for merge to work. - return storage.NewMergeChunkSeriesSet([]storage.ChunkSeriesSet{ - q.blockChunkQuerier.Select(ctx, true, hints, matchers...), - &metricRenameChunkSeriesSet{ChunkSeriesSet: q.blockChunkQuerier.Select(ctx, true, hints, newMatchers...), from: escaped, to: original}, - }, storage.NewCompactingChunkSeriesMerger(storage.ChainedSeriesMerge)) -} - -type escapedUTF8BlockQuerier struct { - *blockQuerier - es model.EscapingScheme -} - -func (q *escapedUTF8BlockQuerier) LabelValues(ctx context.Context, name string, hints *storage.LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { - panic("not implemented") -} - -func (q *escapedUTF8BlockQuerier) LabelNames(ctx context.Context, hints *storage.LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { - panic("not implemented") -} - -func (q *escapedUTF8BlockQuerier) Select(ctx context.Context, sortSeries bool, hints *storage.SelectHints, matchers ...*labels.Matcher) storage.SeriesSet { - newMatchers, escaped, original, ok := escapeUTF8NameMatcher(matchers, q.es) - if !ok { - return q.blockQuerier.Select(ctx, sortSeries, hints, matchers...) - } - return &metricRenameSeriesSet{SeriesSet: q.blockQuerier.Select(ctx, sortSeries, hints, newMatchers...), from: escaped, to: original} -} - -func escapeUTF8NameMatcher(matchers []*labels.Matcher, es model.EscapingScheme) (newMatchers []*labels.Matcher, escaped, original string, ok bool) { - // TODO: avoid allocation if there is nothing to escape? - newMatchers = make([]*labels.Matcher, len(matchers)) - - for i, m := range matchers { - m2 := *m - if m.Type == labels.MatchEqual && m.Name == model.MetricNameLabel && !model.IsValidLegacyMetricName(m.Value) { - // TODO: what if we get multiple and different __name__ matchers? - // Leaning towards ignoring everything and querying the underlying querier as is. Results will and should be empty. - original = m.Value - m2.Value = model.EscapeName(m.Value, es) - escaped = m2.Value - ok = true - } - newMatchers[i] = &m2 - } - return -} - -type escapedUTF8BlockChunkQuerier struct { - *blockChunkQuerier - es model.EscapingScheme -} - -func (q *escapedUTF8BlockChunkQuerier) LabelValues(ctx context.Context, name string, hints *storage.LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { - panic("not implemented") -} - -func (q *escapedUTF8BlockChunkQuerier) LabelNames(ctx context.Context, hints *storage.LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { - newMatchers, _, _, ok := escapeUTF8NameMatcher(matchers, q.es) - if ok { - matchers = newMatchers - } - return q.blockChunkQuerier.LabelNames(ctx, hints, matchers...) -} - -func (q *escapedUTF8BlockChunkQuerier) Select(ctx context.Context, sortSeries bool, hints *storage.SelectHints, matchers ...*labels.Matcher) storage.ChunkSeriesSet { - newMatchers, escaped, original, ok := escapeUTF8NameMatcher(matchers, q.es) - if !ok { - return q.blockChunkQuerier.Select(ctx, sortSeries, hints, matchers...) - } - return &metricRenameChunkSeriesSet{ChunkSeriesSet: q.blockChunkQuerier.Select(ctx, sortSeries, hints, - newMatchers...), from: escaped, to: original} -} - -type metricRenameSeriesSet struct { - storage.SeriesSet - from, to string -} - -func (u *metricRenameSeriesSet) At() storage.Series { - lbls := labels.NewScratchBuilder(u.SeriesSet.At().Labels().Len()) - u.SeriesSet.At().Labels().Range(func(l labels.Label) { - // TODO: what if we don't find the label we need to map? That would be - // an important bug, because that would break our assumptions that keep - // the series sorted. Panic? Return Next=false and Err= not nil? - if l.Name == model.MetricNameLabel && l.Value == u.from { - lbls.Add(l.Name, u.to) - } else { - lbls.Add(l.Name, l.Value) - } - }) - - return &storage.SeriesEntry{ - Lset: lbls.Labels(), - SampleIteratorFn: func(it chunkenc.Iterator) chunkenc.Iterator { - return u.SeriesSet.At().Iterator(it) - }, - } -} - -func (u *metricRenameSeriesSet) Warnings() annotations.Annotations { - // Warnings are for the whole set, so no sorting needed. However: - // TODO: can a warning be referencing a metric name? Would that be a problem? I think not, but would be confusing. - // TODO: should we add a warning about the renaming? - return u.SeriesSet.Warnings() -} - -type metricRenameChunkSeriesSet struct { - storage.ChunkSeriesSet - from, to string -} - -func (u *metricRenameChunkSeriesSet) At() storage.ChunkSeries { - lbls := labels.NewScratchBuilder(u.ChunkSeriesSet.At().Labels().Len()) - u.ChunkSeriesSet.At().Labels().Range(func(l labels.Label) { - // TODO: what if we don't find the label we need to map? That would be - // an important bug, because that would break our assumptions that keep - // the series sorted. Panic? Return Next=false and Err= not nil? - if l.Name == model.MetricNameLabel && l.Value == u.from { - lbls.Add(l.Name, u.to) - } else { - lbls.Add(l.Name, l.Value) - } - }) - - return &storage.ChunkSeriesEntry{ - Lset: lbls.Labels(), - ChunkIteratorFn: func(it chunks.Iterator) chunks.Iterator { - return u.ChunkSeriesSet.At().Iterator(it) - }, - } -} - -func (u *metricRenameChunkSeriesSet) Warnings() annotations.Annotations { - // Warnings are for the whole set, so no sorting needed. However: - // TODO: can a warning be referencing a metric name? Would that be a problem? I think not, but would be confusing. - // TODO: should we add a warning about the renaming? - return u.ChunkSeriesSet.Warnings() -} diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go index 6d39d8932..2290e76f2 100644 --- a/tsdb/querier_test.go +++ b/tsdb/querier_test.go @@ -30,8 +30,6 @@ import ( "github.com/oklog/ulid" "github.com/stretchr/testify/require" - "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" @@ -421,290 +419,6 @@ func TestBlockQuerier(t *testing.T) { } } -var utf8Data = []seriesSamples{ - { - lset: map[string]string{"a": "a", "__name__": "foo.bar"}, - chunks: [][]sample{ - {{1, 2, nil, nil}, {2, 3, nil, nil}, {3, 4, nil, nil}}, - {{5, 2, nil, nil}, {6, 3, nil, nil}, {7, 4, nil, nil}}, - }, - }, - { - lset: map[string]string{"a": "b", "__name__": "foo.bar"}, - chunks: [][]sample{ - {{1, 1, nil, nil}, {2, 2, nil, nil}, {3, 3, nil, nil}}, - {{5, 3, nil, nil}, {6, 6, nil, nil}}, - }, - }, - { - lset: map[string]string{"c": "d", "__name__": "baz.qux"}, - chunks: [][]sample{ - {{1, 1, nil, nil}, {2, 2, nil, nil}, {3, 3, nil, nil}}, - {{5, 3, nil, nil}, {6, 6, nil, nil}}, - }, - }, -} - -var underscoreEscapedUTF8Data = []seriesSamples{ - { - lset: map[string]string{"a": "c", "__name__": "foo_bar"}, - chunks: [][]sample{ - {{1, 3, nil, nil}, {2, 2, nil, nil}, {3, 6, nil, nil}}, - {{5, 1, nil, nil}, {6, 7, nil, nil}, {7, 2, nil, nil}}, - }, - }, - { - lset: map[string]string{"e": "f", "__name__": "baz_qux"}, - chunks: [][]sample{ - {{1, 3, nil, nil}, {2, 2, nil, nil}, {3, 6, nil, nil}}, - {{5, 1, nil, nil}, {6, 7, nil, nil}, {7, 2, nil, nil}}, - }, - }, - { - lset: map[string]string{"__name__": "another_metric"}, - chunks: [][]sample{ - {{1, 41, nil, nil}, {2, 42, nil, nil}, {3, 43, nil, nil}}, - {{5, 45, nil, nil}, {6, 46, nil, nil}, {7, 47, nil, nil}}, - }, - }, -} - -var mixedUTF8Data = append(utf8Data, underscoreEscapedUTF8Data...) - -var s1 = storage.NewListSeries(labels.FromStrings("a", "a", "__name__", "foo.bar"), - []chunks.Sample{sample{1, 2, nil, nil}, sample{2, 3, nil, nil}, sample{3, 4, nil, nil}, sample{5, 2, nil, nil}, sample{6, 3, nil, nil}, sample{7, 4, nil, nil}}, -) -var c1 = storage.NewListChunkSeriesFromSamples(labels.FromStrings("a", "a", "__name__", "foo.bar"), - []chunks.Sample{sample{1, 2, nil, nil}, sample{2, 3, nil, nil}, sample{3, 4, nil, nil}}, []chunks.Sample{sample{5, 2, nil, nil}, sample{6, 3, nil, nil}, sample{7, 4, nil, nil}}, -) -var s2 = storage.NewListSeries(labels.FromStrings("a", "b", "__name__", "foo.bar"), - []chunks.Sample{sample{1, 1, nil, nil}, sample{2, 2, nil, nil}, sample{3, 3, nil, nil}, sample{5, 3, nil, nil}, sample{6, 6, nil, nil}}, -) -var c2 = storage.NewListChunkSeriesFromSamples(labels.FromStrings("a", "b", "__name__", "foo.bar"), - []chunks.Sample{sample{1, 1, nil, nil}, sample{2, 2, nil, nil}, sample{3, 3, nil, nil}}, []chunks.Sample{sample{5, 3, nil, nil}, sample{6, 6, nil, nil}}, -) - -var s3 = storage.NewListSeries(labels.FromStrings("a", "c", "__name__", "foo_bar"), - []chunks.Sample{sample{1, 3, nil, nil}, sample{2, 2, nil, nil}, sample{3, 6, nil, nil}, sample{5, 1, nil, nil}, sample{6, 7, nil, nil}, sample{7, 2, nil, nil}}, -) -var c3 = storage.NewListChunkSeriesFromSamples(labels.FromStrings("a", "c", "__name__", "foo_bar"), - []chunks.Sample{sample{1, 3, nil, nil}, sample{2, 2, nil, nil}, sample{3, 6, nil, nil}}, []chunks.Sample{sample{5, 1, nil, nil}, sample{6, 7, nil, nil}, sample{7, 2, nil, nil}}, -) -var s3Unescaped = storage.NewListSeries(labels.FromStrings("a", "c", "__name__", "foo.bar"), - []chunks.Sample{sample{1, 3, nil, nil}, sample{2, 2, nil, nil}, sample{3, 6, nil, nil}, sample{5, 1, nil, nil}, sample{6, 7, nil, nil}, sample{7, 2, nil, nil}}, -) -var c3Unescaped = storage.NewListChunkSeriesFromSamples(labels.FromStrings("a", "c", "__name__", "foo.bar"), - []chunks.Sample{sample{1, 3, nil, nil}, sample{2, 2, nil, nil}, sample{3, 6, nil, nil}}, []chunks.Sample{sample{5, 1, nil, nil}, sample{6, 7, nil, nil}, sample{7, 2, nil, nil}}, -) - -func TestMixedUTF8BlockQuerier_Select(t *testing.T) { - // TODO(npazosmendez): test cases - // * same label set is combines and samples are returned in order - - for _, c := range []querierSelectTestCase{ - { - ms: []*labels.Matcher{}, - exp: newMockSeriesSet([]storage.Series{}), - expChks: newMockChunkSeriesSet([]storage.ChunkSeries{}), - }, - { - // No __name__= matcher, no-op - mint: math.MinInt64, - maxt: math.MaxInt64, - ms: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "a", ".+")}, - exp: newMockSeriesSet([]storage.Series{s1, s2, s3}), - expChks: newMockChunkSeriesSet([]storage.ChunkSeries{c1, c2, c3}), - }, - { - // __name__= matcher, explode query and relabel - mint: math.MinInt64, - maxt: math.MaxInt64, - ms: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "__name__", "foo.bar")}, - exp: newMockSeriesSet([]storage.Series{s1, s2, s3Unescaped}), - expChks: newMockChunkSeriesSet([]storage.ChunkSeries{c1, c2, c3Unescaped}), - }, - { - // __name__= matcher plus other labels - mint: math.MinInt64, - maxt: math.MaxInt64, - ms: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "__name__", "foo.bar"), labels.MustNewMatcher(labels.MatchNotEqual, "a", "b")}, - exp: newMockSeriesSet([]storage.Series{s1, s3Unescaped}), - expChks: newMockChunkSeriesSet([]storage.ChunkSeries{c1, c3Unescaped}), - }, - { - // No need to escape matcher, no-op - mint: math.MinInt64, - maxt: math.MaxInt64, - ms: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "__name__", "foo_bar")}, - exp: newMockSeriesSet([]storage.Series{s3}), - expChks: newMockChunkSeriesSet([]storage.ChunkSeries{c3}), - }, - } { - ir, cr, _, _ := createIdxChkReaders(t, mixedUTF8Data) - q := &blockQuerier{ - blockBaseQuerier: &blockBaseQuerier{ - index: ir, - chunks: cr, - tombstones: tombstones.NewMemTombstones(), - - mint: c.mint, - maxt: c.maxt, - }, - } - - mixedQ := &mixedUTF8BlockQuerier{ - blockQuerier: q, - es: model.UnderscoreEscaping, - } - - mixedChunkQ := &mixedUTF8BlockChunkQuerier{ - blockChunkQuerier: &blockChunkQuerier{ - blockBaseQuerier: &blockBaseQuerier{ - index: ir, - chunks: cr, - tombstones: tombstones.NewMemTombstones(), - mint: c.mint, - maxt: c.maxt, - }, - }, - es: model.UnderscoreEscaping, - } - testQueriersSelect(t, c, mixedQ, mixedChunkQ) - } -} - -func TestMixedUTF8BlockQuerier_Labels(t *testing.T) { - for _, c := range []struct { - mint, maxt int64 - ms []*labels.Matcher - labelName string - expLabelValues []string - expLabelNames []string - }{ - { - mint: math.MinInt64, - maxt: math.MaxInt64, - ms: []*labels.Matcher{}, - labelName: "__name__", - expLabelValues: []string{"another_metric", "baz.qux", "baz_qux", "foo.bar", "foo_bar"}, - expLabelNames: []string{"", "__name__", "a", "c", "e"}, - }, - { - mint: math.MinInt64, - maxt: math.MaxInt64, - ms: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "__name__", "foo.bar")}, - labelName: "__name__", - expLabelValues: []string{"foo.bar"}, - expLabelNames: []string{"__name__", "a"}, - }, - { - mint: math.MinInt64, - maxt: math.MaxInt64, - ms: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "__name__", "baz.qux")}, - labelName: "e", - expLabelValues: []string{"f"}, - expLabelNames: []string{"__name__", "c", "e"}, - }, - } { - ir, cr, _, _ := createIdxChkReaders(t, mixedUTF8Data) - q := &blockQuerier{ - blockBaseQuerier: &blockBaseQuerier{ - index: ir, - chunks: cr, - tombstones: tombstones.NewMemTombstones(), - - mint: c.mint, - maxt: c.maxt, - }, - } - - mixedQ := &mixedUTF8BlockQuerier{ - blockQuerier: q, - es: model.UnderscoreEscaping, - } - - mixedChunkQ := &mixedUTF8BlockChunkQuerier{ - blockChunkQuerier: &blockChunkQuerier{ - blockBaseQuerier: &blockBaseQuerier{ - index: ir, - chunks: cr, - tombstones: tombstones.NewMemTombstones(), - mint: c.mint, - maxt: c.maxt, - }, - }, - es: model.UnderscoreEscaping, - } - t.Run("LabelValues", func(t *testing.T) { - lv, _, err := mixedQ.LabelValues(context.Background(), c.labelName, nil, c.ms...) - require.NoError(t, err) - require.Equal(t, c.expLabelValues, lv) - lv, _, err = mixedChunkQ.LabelValues(context.Background(), c.labelName, nil, c.ms...) - require.NoError(t, err) - require.Equal(t, c.expLabelValues, lv) - }) - - t.Run("LabelNames", func(t *testing.T) { - ln, _, err := mixedQ.LabelNames(context.Background(), nil, c.ms...) - require.NoError(t, err) - require.Equal(t, c.expLabelNames, ln) - ln, _, err = mixedChunkQ.LabelNames(context.Background(), nil, c.ms...) - require.NoError(t, err) - require.Equal(t, c.expLabelNames, ln) - }) - require.NoError(t, mixedQ.Close()) - require.NoError(t, mixedChunkQ.Close()) - } -} - -func TestEscapedUTF8BlockQuerier(t *testing.T) { - for _, c := range []querierSelectTestCase{ - { - ms: []*labels.Matcher{}, - exp: newMockSeriesSet([]storage.Series{}), - expChks: newMockChunkSeriesSet([]storage.ChunkSeries{}), - }, - { - mint: math.MinInt64, - maxt: math.MaxInt64, - ms: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "a", ".+")}, - exp: newMockSeriesSet([]storage.Series{s3}), - expChks: newMockChunkSeriesSet([]storage.ChunkSeries{c3}), - }, - } { - ir, cr, _, _ := createIdxChkReaders(t, underscoreEscapedUTF8Data) - q := &blockQuerier{ - blockBaseQuerier: &blockBaseQuerier{ - index: ir, - chunks: cr, - tombstones: tombstones.NewMemTombstones(), - - mint: c.mint, - maxt: c.maxt, - }, - } - - escapedQ := &escapedUTF8BlockQuerier{ - blockQuerier: q, - es: model.UnderscoreEscaping, - } - - escapedChunkQ := &escapedUTF8BlockChunkQuerier{ - blockChunkQuerier: &blockChunkQuerier{ - blockBaseQuerier: &blockBaseQuerier{ - index: ir, - chunks: cr, - tombstones: tombstones.NewMemTombstones(), - mint: c.mint, - maxt: c.maxt, - }, - }, - es: model.UnderscoreEscaping, - } - testQueriersSelect(t, c, escapedQ, escapedChunkQ) - } -} - func TestBlockQuerier_AgainstHeadWithOpenChunks(t *testing.T) { for _, c := range []querierSelectTestCase{ {