mirror of
https://github.com/prometheus/prometheus.git
synced 2025-03-05 20:59:13 -08:00
Fix FastRegexMatcher matching multibyte runes with . (#14059)
When `zeroOrOneCharacterStringMatcher` wach checking the input string, it assumed that if there are more than one bytes, then there are more than one runes, but that's not necessarily true. Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
This commit is contained in:
parent
4b7a44c7a0
commit
2524a91591
|
@ -16,6 +16,7 @@ package labels
|
||||||
import (
|
import (
|
||||||
"slices"
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/grafana/regexp"
|
"github.com/grafana/regexp"
|
||||||
"github.com/grafana/regexp/syntax"
|
"github.com/grafana/regexp/syntax"
|
||||||
|
@ -827,8 +828,7 @@ type zeroOrOneCharacterStringMatcher struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *zeroOrOneCharacterStringMatcher) Matches(s string) bool {
|
func (m *zeroOrOneCharacterStringMatcher) Matches(s string) bool {
|
||||||
// Zero or one.
|
if moreThanOneRune(s) {
|
||||||
if len(s) > 1 {
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -840,6 +840,27 @@ func (m *zeroOrOneCharacterStringMatcher) Matches(s string) bool {
|
||||||
return s[0] != '\n'
|
return s[0] != '\n'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// moreThanOneRune returns true if there are more than one runes in the string.
|
||||||
|
// It doesn't check whether the string is valid UTF-8.
|
||||||
|
// The return value should be always equal to utf8.RuneCountInString(s) > 1,
|
||||||
|
// but the function is optimized for the common case where the string prefix is ASCII.
|
||||||
|
func moreThanOneRune(s string) bool {
|
||||||
|
// If len(s) is exactly one or zero, there can't be more than one rune.
|
||||||
|
// Exit through this path quickly.
|
||||||
|
if len(s) <= 1 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// There's one or more bytes:
|
||||||
|
// If first byte is ASCII then there are multiple runes if there are more bytes after that.
|
||||||
|
if s[0] < utf8.RuneSelf {
|
||||||
|
return len(s) > 1
|
||||||
|
}
|
||||||
|
|
||||||
|
// Less common case: first is a multibyte rune.
|
||||||
|
return utf8.RuneCountInString(s) > 1
|
||||||
|
}
|
||||||
|
|
||||||
// trueMatcher is a stringMatcher which matches any string (always returns true).
|
// trueMatcher is a stringMatcher which matches any string (always returns true).
|
||||||
type trueMatcher struct{}
|
type trueMatcher struct{}
|
||||||
|
|
||||||
|
|
|
@ -84,7 +84,7 @@ var (
|
||||||
"foo", " foo bar", "bar", "buzz\nbar", "bar foo", "bfoo", "\n", "\nfoo", "foo\n", "hello foo world", "hello foo\n world", "",
|
"foo", " foo bar", "bar", "buzz\nbar", "bar foo", "bfoo", "\n", "\nfoo", "foo\n", "hello foo world", "hello foo\n world", "",
|
||||||
"FOO", "Foo", "OO", "Oo", "\nfoo\n", strings.Repeat("f", 20), "prometheus", "prometheus_api_v1", "prometheus_api_v1_foo",
|
"FOO", "Foo", "OO", "Oo", "\nfoo\n", strings.Repeat("f", 20), "prometheus", "prometheus_api_v1", "prometheus_api_v1_foo",
|
||||||
"10.0.1.20", "10.0.2.10", "10.0.3.30", "10.0.4.40",
|
"10.0.1.20", "10.0.2.10", "10.0.3.30", "10.0.4.40",
|
||||||
"foofoo0", "foofoo",
|
"foofoo0", "foofoo", "😀foo0",
|
||||||
|
|
||||||
// Values matching / not matching the test regexps on long alternations.
|
// Values matching / not matching the test regexps on long alternations.
|
||||||
"zQPbMkNO", "zQPbMkNo", "jyyfj00j0061", "jyyfj00j006", "jyyfj00j00612", "NNSPdvMi", "NNSPdvMiXXX", "NNSPdvMixxx", "nnSPdvMi", "nnSPdvMiXXX",
|
"zQPbMkNO", "zQPbMkNo", "jyyfj00j0061", "jyyfj00j006", "jyyfj00j00612", "NNSPdvMi", "NNSPdvMiXXX", "NNSPdvMixxx", "nnSPdvMi", "nnSPdvMiXXX",
|
||||||
|
|
Loading…
Reference in a new issue