mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-12 06:17:27 -08:00
579331446a
* Add benchmark for FastRegexMatcher Signed-off-by: Bryan Boreham <bjboreham@gmail.com> * Use modified regexp package with optimisations See https://github.com/grafana/regexp/tree/speedup#readme Includes the following changes proposed upstream: * [regexp: allow patterns with no alternates to be one-pass](https://go-review.googlesource.com/c/go/+/353711) * [regexp: speed up onepass prefix check](https://go-review.googlesource.com/c/go/+/354909) * [regexp: handle prefix string with fold-case](https://go-review.googlesource.com/c/go/+/358756) * [regexp: avoid copying each instruction executed](https://go-review.googlesource.com/c/go/+/355789) * [regexp: allow prefix string anchored at beginning](https://go-review.googlesource.com/c/go/+/377294) Signed-off-by: Bryan Boreham <bjboreham@gmail.com> * Use regexp code identical to upstream Go Change `grafana/regexp` import to use `main` branch. This means Prometheus is not using the proposed optimisations, but downstream users of Prometheus code are able to `replace` the library with the `speedup` branch which does. Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
109 lines
2.8 KiB
Go
109 lines
2.8 KiB
Go
// Copyright 2020 The Prometheus Authors
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package labels
|
|
|
|
import (
|
|
"strings"
|
|
|
|
"github.com/grafana/regexp"
|
|
"github.com/grafana/regexp/syntax"
|
|
)
|
|
|
|
type FastRegexMatcher struct {
|
|
re *regexp.Regexp
|
|
prefix string
|
|
suffix string
|
|
contains string
|
|
}
|
|
|
|
func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
|
|
re, err := regexp.Compile("^(?:" + v + ")$")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
parsed, err := syntax.Parse(v, syntax.Perl)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
m := &FastRegexMatcher{
|
|
re: re,
|
|
}
|
|
|
|
if parsed.Op == syntax.OpConcat {
|
|
m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed)
|
|
}
|
|
|
|
return m, nil
|
|
}
|
|
|
|
func (m *FastRegexMatcher) MatchString(s string) bool {
|
|
if m.prefix != "" && !strings.HasPrefix(s, m.prefix) {
|
|
return false
|
|
}
|
|
if m.suffix != "" && !strings.HasSuffix(s, m.suffix) {
|
|
return false
|
|
}
|
|
if m.contains != "" && !strings.Contains(s, m.contains) {
|
|
return false
|
|
}
|
|
return m.re.MatchString(s)
|
|
}
|
|
|
|
func (m *FastRegexMatcher) GetRegexString() string {
|
|
return m.re.String()
|
|
}
|
|
|
|
// optimizeConcatRegex returns literal prefix/suffix text that can be safely
|
|
// checked against the label value before running the regexp matcher.
|
|
func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix, contains string) {
|
|
sub := r.Sub
|
|
|
|
// We can safely remove begin and end text matchers respectively
|
|
// at the beginning and end of the regexp.
|
|
if len(sub) > 0 && sub[0].Op == syntax.OpBeginText {
|
|
sub = sub[1:]
|
|
}
|
|
if len(sub) > 0 && sub[len(sub)-1].Op == syntax.OpEndText {
|
|
sub = sub[:len(sub)-1]
|
|
}
|
|
|
|
if len(sub) == 0 {
|
|
return
|
|
}
|
|
|
|
// Given Prometheus regex matchers are always anchored to the begin/end
|
|
// of the text, if the first/last operations are literals, we can safely
|
|
// treat them as prefix/suffix.
|
|
if sub[0].Op == syntax.OpLiteral && (sub[0].Flags&syntax.FoldCase) == 0 {
|
|
prefix = string(sub[0].Rune)
|
|
}
|
|
if last := len(sub) - 1; sub[last].Op == syntax.OpLiteral && (sub[last].Flags&syntax.FoldCase) == 0 {
|
|
suffix = string(sub[last].Rune)
|
|
}
|
|
|
|
// If contains any literal which is not a prefix/suffix, we keep the
|
|
// 1st one. We do not keep the whole list of literals to simplify the
|
|
// fast path.
|
|
for i := 1; i < len(sub)-1; i++ {
|
|
if sub[i].Op == syntax.OpLiteral && (sub[i].Flags&syntax.FoldCase) == 0 {
|
|
contains = string(sub[i].Rune)
|
|
break
|
|
}
|
|
}
|
|
|
|
return
|
|
}
|