notes and tests

This commit is contained in:
Nicolás Pazos 2024-09-11 14:06:57 -03:00
parent 5965843bcd
commit 5590b33710
2 changed files with 75 additions and 12 deletions

View file

@ -7675,4 +7675,53 @@ func TestUTF8(t *testing.T) {
require.Equal(t, map[string][]chunks.Sample{
labels.FromStrings("__name__", "with.dots").String(): {sample{t: 100, f: 1.0}, sample{t: 200, f: 2.0}},
}, query(t, q, labels.MustNewMatcher(labels.MatchEqual, "__name__", "with.dots")))
q, err = db.Querier(math.MinInt, math.MaxInt64)
require.NoError(t, err)
require.Equal(t, map[string][]chunks.Sample{
labels.FromStrings("__name__", "with_dots").String(): {sample{t: 200, f: 2.0}},
}, query(t, q, labels.MustNewMatcher(labels.MatchEqual, "__name__", "with_dots")))
app = db.Appender(context.Background())
_, err = app.Append(0, labels.FromStrings("__name__", "foob_r"), 300, 3.0)
require.NoError(t, err)
_, err = app.Append(0, labels.FromStrings("__name__", "foobár"), 400, 4.0)
require.NoError(t, err)
require.NoError(t, app.Commit())
q, err = db.Querier(math.MinInt, math.MaxInt64)
require.NoError(t, err)
require.Equal(t, map[string][]chunks.Sample{
labels.FromStrings("__name__", "foobár").String(): {sample{t: 300, f: 3.0}, sample{t: 400, f: 4.0}},
}, query(t, q, labels.MustNewMatcher(labels.MatchEqual, "__name__", "foobár")))
app = db.Appender(context.Background())
_, err = app.Append(0, labels.FromStrings("__name__", "@bazbár"), 300, 4.0)
require.NoError(t, err)
_, err = app.Append(0, labels.FromStrings("__name__", "_bazb_r"), 300, 3.0)
require.NoError(t, err)
require.NoError(t, app.Commit())
require.NoError(t, db.CompactHead(NewRangeHead(db.Head(), 0, 1000)))
for _ = range 10000 {
q, err = db.Querier(math.MinInt, math.MaxInt64)
require.NoError(t, err)
require.Equal(t, map[string][]chunks.Sample{
labels.FromStrings("__name__", "@bazbár").String(): {sample{t: 300, f: 3.0}},
}, query(t, q, labels.MustNewMatcher(labels.MatchEqual, "__name__", "@bazbár")))
}
// Test cases and notes:
// - "OOO or repeated timestamps" with different labels that are the same after escaing. not sure if that can be a problem, maybe non-determinism?
// - should we make it so compaction makes it so blocks don't have mixed?
// - some labels are escaped but some not, then series are not the same
// - what if labels are not explicitly mentioned in the query, but still queried? e.g.
// foo.bar{"a.b"="c"}
// foo_bar{"a_b"="b"}
// and i query for __name__="a.b"
// maybe only do it for metric name?
// Implementation questions:
// - could the order of metrics mess things up? should i sort something?
}

View file

@ -105,6 +105,7 @@ func (q *blockBaseQuerier) Close() error {
type utf8MixedSeries struct {
s storage.Series
mappings map[string]string
}
func (u utf8MixedSeries) Iterator(it chunkenc.Iterator) chunkenc.Iterator {
@ -117,7 +118,10 @@ func (u utf8MixedSeries) Labels() labels.Labels {
var n, v string
if l.Name == "__name__" {
n = l.Name
v = strings.ReplaceAll(l.Value, "_", ".")
v = u.mappings[l.Value]
if v == "" {
v = l.Value
}
} else {
n = strings.ReplaceAll(l.Name, "_", ".")
v = l.Value
@ -129,10 +133,11 @@ func (u utf8MixedSeries) Labels() labels.Labels {
type utf8MixedSeriesSet struct {
ss storage.SeriesSet
mappings map[string]string
}
func (u *utf8MixedSeriesSet) At() storage.Series {
return utf8MixedSeries{s: u.ss.At()}
return utf8MixedSeries{s: u.ss.At(), mappings: u.mappings}
}
func (u *utf8MixedSeriesSet) Err() error {
@ -147,8 +152,8 @@ func (u *utf8MixedSeriesSet) Warnings() annotations.Annotations {
return u.ss.Warnings()
}
func NewUTF8MixedSeriesSet(ss storage.SeriesSet) storage.SeriesSet {
return &utf8MixedSeriesSet{ss: ss}
func NewUTF8MixedSeriesSet(ss storage.SeriesSet, mappings map[string]string) storage.SeriesSet {
return &utf8MixedSeriesSet{ss: ss, mappings: mappings}
}
type utf8MixedQuerier struct {
@ -171,17 +176,23 @@ func (u *utf8MixedQuerier) LabelValues(ctx context.Context, name string, hints *
func (u *utf8MixedQuerier) Select(ctx context.Context, sortSeries bool, hints *storage.SelectHints, matchers ...*labels.Matcher) storage.SeriesSet {
ms2 := make([]*labels.Matcher, 0, len(matchers))
change := false
mappings := map[string]string{}
for i, m := range matchers {
ms2 = append(ms2, &labels.Matcher{})
if m.Type == labels.MatchEqual {
ms2[i].Name = strings.ReplaceAll(m.Name, ".", "_")
if ms2[i].Name == "__name__" {
ms2[i].Value = strings.ReplaceAll(m.Value, ".", "_")
ms2[i].Name = model.EscapeName(m.Name, u.es)
if ms2[i].Name == model.MetricNameLabel {
ms2[i].Value = model.EscapeName(m.Value, u.es)
} else {
ms2[i].Value = m.Value
}
if m.Name != ms2[i].Name || m.Value != ms2[i].Value {
if m.Name != ms2[i].Name {
change = true
mappings[ms2[i].Name] = m.Name
}
if m.Value != ms2[i].Value {
change = true
mappings[ms2[i].Value] = m.Value
}
} else {
ms2[i] = m
@ -189,10 +200,13 @@ func (u *utf8MixedQuerier) Select(ctx context.Context, sortSeries bool, hints *s
ms2[i].Type = m.Type
}
sets := []storage.SeriesSet{
u.q.Select(ctx, sortSeries, hints, matchers...),
// We need to sort for merge to work.
// TODO: maybe only pass true if we indeed have to merge
u.q.Select(ctx, true, hints, matchers...),
}
if change {
sets = append(sets, NewUTF8MixedSeriesSet(u.q.Select(ctx, sortSeries, hints, ms2...)))
// TODO: maybe utf8MixedSeriesSet should always sort afterwards, so there's no need to sort the underlying query?
sets = append(sets, NewUTF8MixedSeriesSet(u.q.Select(ctx, true, hints, ms2...), mappings))
}
return storage.NewMergeSeriesSet(sets, storage.ChainedSeriesMerge)
}