2020-06-26 02:49:09 -07:00
|
|
|
// Copyright 2020 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package labels
|
|
|
|
|
|
|
|
import (
|
2024-01-25 01:40:57 -08:00
|
|
|
"fmt"
|
|
|
|
"math/rand"
|
2022-02-08 02:03:20 -08:00
|
|
|
"strings"
|
2020-06-26 02:49:09 -07:00
|
|
|
"testing"
|
2024-01-25 01:40:57 -08:00
|
|
|
"time"
|
2024-05-13 06:36:55 -07:00
|
|
|
"unicode/utf8"
|
2020-06-26 02:49:09 -07:00
|
|
|
|
2024-01-25 01:40:57 -08:00
|
|
|
"github.com/grafana/regexp"
|
2022-02-08 02:03:20 -08:00
|
|
|
"github.com/grafana/regexp/syntax"
|
2020-10-29 02:43:23 -07:00
|
|
|
"github.com/stretchr/testify/require"
|
2020-06-26 02:49:09 -07:00
|
|
|
)
|
|
|
|
|
2024-01-25 01:40:57 -08:00
|
|
|
var (
|
|
|
|
asciiRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_")
|
|
|
|
regexes = []string{
|
|
|
|
"",
|
|
|
|
"foo",
|
|
|
|
"^foo",
|
|
|
|
"(foo|bar)",
|
|
|
|
"foo.*",
|
|
|
|
".*foo",
|
|
|
|
"^.*foo$",
|
|
|
|
"^.+foo$",
|
2024-05-13 08:05:31 -07:00
|
|
|
".?",
|
2024-01-25 01:40:57 -08:00
|
|
|
".*",
|
|
|
|
".+",
|
|
|
|
"foo.+",
|
|
|
|
".+foo",
|
|
|
|
"foo\n.+",
|
|
|
|
"foo\n.*",
|
|
|
|
".*foo.*",
|
|
|
|
".+foo.+",
|
|
|
|
"(?s:.*)",
|
|
|
|
"(?s:.+)",
|
|
|
|
"(?s:^.*foo$)",
|
|
|
|
"(?i:foo)",
|
|
|
|
"(?i:(foo|bar))",
|
|
|
|
"(?i:(foo1|foo2|bar))",
|
|
|
|
"^(?i:foo|oo)|(bar)$",
|
|
|
|
"(?i:(foo1|foo2|aaa|bbb|ccc|ddd|eee|fff|ggg|hhh|iii|lll|mmm|nnn|ooo|ppp|qqq|rrr|sss|ttt|uuu|vvv|www|xxx|yyy|zzz))",
|
|
|
|
"((.*)(bar|b|buzz)(.+)|foo)$",
|
|
|
|
"^$",
|
|
|
|
"(prometheus|api_prom)_api_v1_.+",
|
|
|
|
"10\\.0\\.(1|2)\\.+",
|
|
|
|
"10\\.0\\.(1|2).+",
|
|
|
|
"((fo(bar))|.+foo)",
|
|
|
|
// A long case sensitive alternation.
|
|
|
|
"zQPbMkNO|NNSPdvMi|iWuuSoAl|qbvKMimS|IecrXtPa|seTckYqt|NxnyHkgB|fIDlOgKb|UhlWIygH|OtNoJxHG|cUTkFVIV|mTgFIHjr|jQkoIDtE|PPMKxRXl|AwMfwVkQ|CQyMrTQJ|BzrqxVSi|nTpcWuhF|PertdywG|ZZDgCtXN|WWdDPyyE|uVtNQsKk|BdeCHvPZ|wshRnFlH|aOUIitIp|RxZeCdXT|CFZMslCj|AVBZRDxl|IzIGCnhw|ythYuWiz|oztXVXhl|VbLkwqQx|qvaUgyVC|VawUjPWC|ecloYJuj|boCLTdSU|uPrKeAZx|hrMWLWBq|JOnUNHRM|rYnujkPq|dDEdZhIj|DRrfvugG|yEGfDxVV|YMYdJWuP|PHUQZNWM|AmKNrLis|zTxndVfn|FPsHoJnc|EIulZTua|KlAPhdzg|ScHJJCLt|NtTfMzME|eMCwuFdo|SEpJVJbR|cdhXZeCx|sAVtBwRh|kVFEVcMI|jzJrxraA|tGLHTell|NNWoeSaw|DcOKSetX|UXZAJyka|THpMphDP|rizheevl|kDCBRidd|pCZZRqyu|pSygkitl|SwZGkAaW|wILOrfNX|QkwVOerj|kHOMxPDr|EwOVycJv|AJvtzQFS|yEOjKYYB|LizIINLL|JBRSsfcG|YPiUqqNl|IsdEbvee|MjEpGcBm|OxXZVgEQ|xClXGuxa|UzRCGFEb|buJbvfvA|IPZQxRet|oFYShsMc|oBHffuHO|bzzKrcBR|KAjzrGCl|IPUsAVls|OGMUMbIU|gyDccHuR|bjlalnDd|ZLWjeMna|fdsuIlxQ|dVXtiomV|XxedTjNg|XWMHlNoA|nnyqArQX|opfkWGhb|wYtnhdYb",
|
|
|
|
// An extremely long case sensitive alternation. This is a special
|
|
|
|
// case because the values share common prefixes rather than being
|
|
|
|
// entirely random. This is common in the real world. For example, the
|
|
|
|
// values of a label like kubernetes pod will often include the
|
|
|
|
// deployment name as a prefix.
|
|
|
|
"jyyfj00j0061|jyyfj00j0062|jyyfj94j0093|jyyfj99j0093|jyyfm01j0021|jyyfm02j0021|jyefj00j0192|jyefj00j0193|jyefj00j0194|jyefj00j0195|jyefj00j0196|jyefj00j0197|jyefj00j0290|jyefj00j0291|jyefj00j0292|jyefj00j0293|jyefj00j0294|jyefj00j0295|jyefj00j0296|jyefj00j0297|jyefj89j0394|jyefj90j0394|jyefj91j0394|jyefj95j0347|jyefj96j0322|jyefj96j0347|jyefj97j0322|jyefj97j0347|jyefj98j0322|jyefj98j0347|jyefj99j0320|jyefj99j0322|jyefj99j0323|jyefj99j0335|jyefj99j0336|jyefj99j0344|jyefj99j0347|jyefj99j0349|jyefj99j0351|jyeff00j0117|lyyfm01j0025|lyyfm01j0028|lyyfm01j0041|lyyfm01j0133|lyyfm01j0701|lyyfm02j0025|lyyfm02j0028|lyyfm02j0041|lyyfm02j0133|lyyfm02j0701|lyyfm03j0701|lyefj00j0775|lyefj00j0776|lyefj00j0777|lyefj00j0778|lyefj00j0779|lyefj00j0780|lyefj00j0781|lyefj00j0782|lyefj50j3807|lyefj50j3852|lyefj51j3807|lyefj51j3852|lyefj52j3807|lyefj52j3852|lyefj53j3807|lyefj53j3852|lyefj54j3807|lyefj54j3852|lyefj54j3886|lyefj55j3807|lyefj55j3852|lyefj55j3886|lyefj56j3807|lyefj56j3852|lyefj56j3886|lyefj57j3807|lyefj57j3852|lyefj57j3886|lyefj58j3807|lyefj58j3852|lyefj58j3886|lyefj59j3807|lyefj59j3852|lyefj59j3886|lyefj60j3807|lyefj60j3852|lyefj60j3886|lyefj61j3807|lyefj61j3852|lyefj61j3886|lyefj62j3807|lyefj62j3852|lyefj62j3886|lyefj63j3807|lyefj63j3852|lyefj63j3886|lyefj64j3807|lyefj64j3852|lyefj64j3886|lyefj65j3807|lyefj65j3852|lyefj65j3886|lyefj66j3807|lyefj66j3852|lyefj66j3886|lyefj67j3807|lyefj67j3852|lyefj67j3886|lyefj68j3807|lyefj68j3852|lyefj68j3886|lyefj69j3807|lyefj69j3846|lyefj69j3852|lyefj69j3886|lyefj70j3807|lyefj70j3846|lyefj70j3852|lyefj70j3886|lyefj71j3807|lyefj71j3846|lyefj71j3852|lyefj71j3886|lyefj72j3807|lyefj72j3846|lyefj72j3852|lyefj72j3886|lyefj73j3807|lyefj73j3846|lyefj73j3852|lyefj73j3886|lyefj74j3807|lyefj74j3846|lyefj74j3852|lyefj74j3886|lyefj75j3807|lyefj75j3808|lyefj75j3846|lyefj75j3852|lyefj75j3886|lyefj76j3732|lyefj76j3807|lyefj76j3808|lyefj76j3846|lyefj76j3852|lyefj76j3886|lyefj77j3732|lyefj77j3807|lyefj77j3808|lyefj77j3846|lyefj77j3852|lyefj77j3886|lyefj78j3278|lyefj78j3732|lyefj78j3807|lyefj78j3808|lyefj78j3846|lyefj78j3852|lyefj78j3886|lyefj79j3732|lyefj79j3807|lyefj79j3808|lyefj79j3846|lyefj79j3852|lyefj79j3886|lyefj80j3732|lyefj80j3807|lyefj80j3808|lyefj80j3846|lyefj80j3852|lyefj80j3886|lyefj81j3732|lyefj81j3807|lyefj81j3808|lyefj81j3846|lyefj81j3852|lyefj81j3886|lyefj82j3732|lyefj82j3807|lyefj82j3808|lyefj82j3846|lyefj82j3852|lyefj82j3886|lyefj83j3732|lyefj83j3807|lyefj83j3808|lyefj83j3846|lyefj83j3852|lyefj83j3886|lyefj84j3732|lyefj84j3807|lyefj84j3808|lyefj84j3846|lyefj84j3852|lyefj84j3886|lyefj85j3732|lyefj85j3807|lyefj85j3808|lyefj85j3846|lyefj85j3852|lyefj85j3886|lyefj86j3278|lyefj86j3732|lyefj86j3807|lyefj86j3808|lyefj86j3846|lyefj86j3852|lyefj86j3886|lyefj87j3278|lyefj87j3732|lyefj87j3807|lyefj87j3808|lyefj87j3846|lyefj87j3852|lyefj87j3886|lyefj88j3732|lyefj88j3807|lyefj88j3808|lyefj88j3846|lyefj88j3852|lyefj88j3886|lyefj89j3732|lyefj89j3807|lyefj89j3808|lyefj89j3846|lyefj89j3852|lyefj89j3886|lyefj90j3732|lyefj90j3807|lyefj90j3808|lyefj90j3846|lyefj90j3852|lyefj90j3886|lyefj91j3732|lyefj91j3807|lyefj91j3808|lyefj91j3846|lyefj91j3852|lyefj91j3886|lyefj92j3732|lyefj92j3807|lyefj92j3808|lyefj92j3846|lyefj92j3852|lyefj92j3886|lyefj93j3732|lyefj93j3807|lyefj93j3808|lyefj93j3846|lyefj93j3852|lyefj93j3885|lyefj93j3886|lyefj94j3525|lyefj94j3732|lyefj94j3807|lyefj94j3808|lyefj94j3846|lyefj94j3852|lyefj94j3885|lyefj94j3886|lyefj95j3525|lyefj95j3732|lyefj95j3807|lyefj95j3808|lyefj95j3846|lyefj95j3852|lyefj95j3886|lyefj96j3732|lyefj96j3803|lyefj96j3807|lyefj96j3808|lyefj96j3846|lyefj96j3852|lyefj96j3886|lyefj97j3333|lyefj97j3732|lyefj97j3792|lyefj97j3803|lyefj97j3807|lyefj97j3808|lyefj97j3838|lyefj97j3843|lyefj97j3846|lyefj97j3852|lyefj97j3886|lyefj98j3083|lyefj98j3333|lyefj98j3732|lyefj98j3807|lyefj98j3808|lyefj98j3838|lyefj98j3843|lyefj98j3846|lyefj98j3852|lyefj98j3873|lyefj98j3877|lyefj98j3882|lyefj98j3886|lyefj99j2984|lyefj99j3083|lyefj99j3333|lyefj99j3732|lyefj99j3807|lyefj99j3808|lyefj99j3846|lyefj99j3849|lyefj99j3852|lyefj99j3873|lyefj99j3877|lyefj99j3882|lyefj99j3884|lyefj99j3886|lyeff00j0106|lyeff00j01
|
|
|
|
// A long case insensitive alternation.
|
|
|
|
"(?i:(zQPbMkNO|NNSPdvMi|iWuuSoAl|qbvKMimS|IecrXtPa|seTckYqt|NxnyHkgB|fIDlOgKb|UhlWIygH|OtNoJxHG|cUTkFVIV|mTgFIHjr|jQkoIDtE|PPMKxRXl|AwMfwVkQ|CQyMrTQJ|BzrqxVSi|nTpcWuhF|PertdywG|ZZDgCtXN|WWdDPyyE|uVtNQsKk|BdeCHvPZ|wshRnFlH|aOUIitIp|RxZeCdXT|CFZMslCj|AVBZRDxl|IzIGCnhw|ythYuWiz|oztXVXhl|VbLkwqQx|qvaUgyVC|VawUjPWC|ecloYJuj|boCLTdSU|uPrKeAZx|hrMWLWBq|JOnUNHRM|rYnujkPq|dDEdZhIj|DRrfvugG|yEGfDxVV|YMYdJWuP|PHUQZNWM|AmKNrLis|zTxndVfn|FPsHoJnc|EIulZTua|KlAPhdzg|ScHJJCLt|NtTfMzME|eMCwuFdo|SEpJVJbR|cdhXZeCx|sAVtBwRh|kVFEVcMI|jzJrxraA|tGLHTell|NNWoeSaw|DcOKSetX|UXZAJyka|THpMphDP|rizheevl|kDCBRidd|pCZZRqyu|pSygkitl|SwZGkAaW|wILOrfNX|QkwVOerj|kHOMxPDr|EwOVycJv|AJvtzQFS|yEOjKYYB|LizIINLL|JBRSsfcG|YPiUqqNl|IsdEbvee|MjEpGcBm|OxXZVgEQ|xClXGuxa|UzRCGFEb|buJbvfvA|IPZQxRet|oFYShsMc|oBHffuHO|bzzKrcBR|KAjzrGCl|IPUsAVls|OGMUMbIU|gyDccHuR|bjlalnDd|ZLWjeMna|fdsuIlxQ|dVXtiomV|XxedTjNg|XWMHlNoA|nnyqArQX|opfkWGhb|wYtnhdYb))",
|
|
|
|
// A long case insensitive alternation where each entry ends with ".*".
|
|
|
|
"(?i:(zQPbMkNO.*|NNSPdvMi.*|iWuuSoAl.*|qbvKMimS.*|IecrXtPa.*|seTckYqt.*|NxnyHkgB.*|fIDlOgKb.*|UhlWIygH.*|OtNoJxHG.*|cUTkFVIV.*|mTgFIHjr.*|jQkoIDtE.*|PPMKxRXl.*|AwMfwVkQ.*|CQyMrTQJ.*|BzrqxVSi.*|nTpcWuhF.*|PertdywG.*|ZZDgCtXN.*|WWdDPyyE.*|uVtNQsKk.*|BdeCHvPZ.*|wshRnFlH.*|aOUIitIp.*|RxZeCdXT.*|CFZMslCj.*|AVBZRDxl.*|IzIGCnhw.*|ythYuWiz.*|oztXVXhl.*|VbLkwqQx.*|qvaUgyVC.*|VawUjPWC.*|ecloYJuj.*|boCLTdSU.*|uPrKeAZx.*|hrMWLWBq.*|JOnUNHRM.*|rYnujkPq.*|dDEdZhIj.*|DRrfvugG.*|yEGfDxVV.*|YMYdJWuP.*|PHUQZNWM.*|AmKNrLis.*|zTxndVfn.*|FPsHoJnc.*|EIulZTua.*|KlAPhdzg.*|ScHJJCLt.*|NtTfMzME.*|eMCwuFdo.*|SEpJVJbR.*|cdhXZeCx.*|sAVtBwRh.*|kVFEVcMI.*|jzJrxraA.*|tGLHTell.*|NNWoeSaw.*|DcOKSetX.*|UXZAJyka.*|THpMphDP.*|rizheevl.*|kDCBRidd.*|pCZZRqyu.*|pSygkitl.*|SwZGkAaW.*|wILOrfNX.*|QkwVOerj.*|kHOMxPDr.*|EwOVycJv.*|AJvtzQFS.*|yEOjKYYB.*|LizIINLL.*|JBRSsfcG.*|YPiUqqNl.*|IsdEbvee.*|MjEpGcBm.*|OxXZVgEQ.*|xClXGuxa.*|UzRCGFEb.*|buJbvfvA.*|IPZQxRet.*|oFYShsMc.*|oBHffuHO.*|bzzKrcBR.*|KAjzrGCl.*|IPUsAVls.*|OGMUMbIU.*|gyDccHuR.*|bjlalnDd.*|ZLWjeMna.*|fdsuIlxQ.*|dVXtiomV.*|XxedTjNg.*|XWMHlNoA.*|nnyqArQX.*|opfkWGhb.*|wYtnhdYb.*))",
|
|
|
|
// A long case insensitive alternation where each entry starts with ".*".
|
|
|
|
"(?i:(.*zQPbMkNO|.*NNSPdvMi|.*iWuuSoAl|.*qbvKMimS|.*IecrXtPa|.*seTckYqt|.*NxnyHkgB|.*fIDlOgKb|.*UhlWIygH|.*OtNoJxHG|.*cUTkFVIV|.*mTgFIHjr|.*jQkoIDtE|.*PPMKxRXl|.*AwMfwVkQ|.*CQyMrTQJ|.*BzrqxVSi|.*nTpcWuhF|.*PertdywG|.*ZZDgCtXN|.*WWdDPyyE|.*uVtNQsKk|.*BdeCHvPZ|.*wshRnFlH|.*aOUIitIp|.*RxZeCdXT|.*CFZMslCj|.*AVBZRDxl|.*IzIGCnhw|.*ythYuWiz|.*oztXVXhl|.*VbLkwqQx|.*qvaUgyVC|.*VawUjPWC|.*ecloYJuj|.*boCLTdSU|.*uPrKeAZx|.*hrMWLWBq|.*JOnUNHRM|.*rYnujkPq|.*dDEdZhIj|.*DRrfvugG|.*yEGfDxVV|.*YMYdJWuP|.*PHUQZNWM|.*AmKNrLis|.*zTxndVfn|.*FPsHoJnc|.*EIulZTua|.*KlAPhdzg|.*ScHJJCLt|.*NtTfMzME|.*eMCwuFdo|.*SEpJVJbR|.*cdhXZeCx|.*sAVtBwRh|.*kVFEVcMI|.*jzJrxraA|.*tGLHTell|.*NNWoeSaw|.*DcOKSetX|.*UXZAJyka|.*THpMphDP|.*rizheevl|.*kDCBRidd|.*pCZZRqyu|.*pSygkitl|.*SwZGkAaW|.*wILOrfNX|.*QkwVOerj|.*kHOMxPDr|.*EwOVycJv|.*AJvtzQFS|.*yEOjKYYB|.*LizIINLL|.*JBRSsfcG|.*YPiUqqNl|.*IsdEbvee|.*MjEpGcBm|.*OxXZVgEQ|.*xClXGuxa|.*UzRCGFEb|.*buJbvfvA|.*IPZQxRet|.*oFYShsMc|.*oBHffuHO|.*bzzKrcBR|.*KAjzrGCl|.*IPUsAVls|.*OGMUMbIU|.*gyDccHuR|.*bjlalnDd|.*ZLWjeMna|.*fdsuIlxQ|.*dVXtiomV|.*XxedTjNg|.*XWMHlNoA|.*nnyqArQX|.*opfkWGhb|.*wYtnhdYb))",
|
|
|
|
// Quest ".?".
|
|
|
|
"fo.?",
|
|
|
|
"foo.?",
|
|
|
|
"f.?o",
|
|
|
|
".*foo.?",
|
|
|
|
".?foo.+",
|
|
|
|
"foo.?|bar",
|
2020-06-26 02:49:09 -07:00
|
|
|
}
|
2024-01-25 01:40:57 -08:00
|
|
|
values = []string{
|
|
|
|
"foo", " foo bar", "bar", "buzz\nbar", "bar foo", "bfoo", "\n", "\nfoo", "foo\n", "hello foo world", "hello foo\n world", "",
|
|
|
|
"FOO", "Foo", "OO", "Oo", "\nfoo\n", strings.Repeat("f", 20), "prometheus", "prometheus_api_v1", "prometheus_api_v1_foo",
|
|
|
|
"10.0.1.20", "10.0.2.10", "10.0.3.30", "10.0.4.40",
|
2024-05-07 07:33:37 -07:00
|
|
|
"foofoo0", "foofoo", "😀foo0",
|
2020-06-26 02:49:09 -07:00
|
|
|
|
2024-01-25 01:40:57 -08:00
|
|
|
// Values matching / not matching the test regexps on long alternations.
|
|
|
|
"zQPbMkNO", "zQPbMkNo", "jyyfj00j0061", "jyyfj00j006", "jyyfj00j00612", "NNSPdvMi", "NNSPdvMiXXX", "NNSPdvMixxx", "nnSPdvMi", "nnSPdvMiXXX",
|
2024-05-13 08:05:31 -07:00
|
|
|
|
|
|
|
// Invalid utf8
|
|
|
|
"\xfefoo",
|
|
|
|
"foo\xfe",
|
|
|
|
"\xfd",
|
|
|
|
"\xff\xff",
|
2024-01-25 01:40:57 -08:00
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
func TestFastRegexMatcher_MatchString(t *testing.T) {
|
|
|
|
// Run the test both against a set of predefined values and a set of random ones.
|
|
|
|
testValues := append([]string{}, values...)
|
|
|
|
testValues = append(testValues, generateRandomValues()...)
|
|
|
|
|
|
|
|
for _, r := range regexes {
|
|
|
|
r := r
|
|
|
|
for _, v := range testValues {
|
|
|
|
v := v
|
2024-04-03 02:10:35 -07:00
|
|
|
t.Run(readable(r)+` on "`+readable(v)+`"`, func(t *testing.T) {
|
2024-01-25 01:40:57 -08:00
|
|
|
t.Parallel()
|
|
|
|
m, err := NewFastRegexMatcher(r)
|
|
|
|
require.NoError(t, err)
|
|
|
|
re := regexp.MustCompile("^(?:" + r + ")$")
|
|
|
|
require.Equal(t, re.MatchString(v), m.MatchString(v))
|
|
|
|
})
|
|
|
|
}
|
2020-06-26 02:49:09 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-04-03 02:10:35 -07:00
|
|
|
func readable(s string) string {
|
|
|
|
const maxReadableStringLen = 40
|
|
|
|
if len(s) < maxReadableStringLen {
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
return s[:maxReadableStringLen] + "..."
|
|
|
|
}
|
|
|
|
|
2020-06-26 02:49:09 -07:00
|
|
|
func TestOptimizeConcatRegex(t *testing.T) {
|
|
|
|
cases := []struct {
|
2020-07-07 01:38:04 -07:00
|
|
|
regex string
|
|
|
|
prefix string
|
|
|
|
suffix string
|
|
|
|
contains string
|
2020-06-26 02:49:09 -07:00
|
|
|
}{
|
2020-07-07 01:38:04 -07:00
|
|
|
{regex: "foo(hello|bar)", prefix: "foo", suffix: "", contains: ""},
|
|
|
|
{regex: "foo(hello|bar)world", prefix: "foo", suffix: "world", contains: ""},
|
|
|
|
{regex: "foo.*", prefix: "foo", suffix: "", contains: ""},
|
|
|
|
{regex: "foo.*hello.*bar", prefix: "foo", suffix: "bar", contains: "hello"},
|
|
|
|
{regex: ".*foo", prefix: "", suffix: "foo", contains: ""},
|
|
|
|
{regex: "^.*foo$", prefix: "", suffix: "foo", contains: ""},
|
|
|
|
{regex: ".*foo.*", prefix: "", suffix: "", contains: "foo"},
|
|
|
|
{regex: ".*foo.*bar.*", prefix: "", suffix: "", contains: "foo"},
|
|
|
|
{regex: ".*(foo|bar).*", prefix: "", suffix: "", contains: ""},
|
|
|
|
{regex: ".*[abc].*", prefix: "", suffix: "", contains: ""},
|
2020-10-06 05:16:26 -07:00
|
|
|
{regex: ".*((?i)abc).*", prefix: "", suffix: "", contains: ""},
|
|
|
|
{regex: ".*(?i:abc).*", prefix: "", suffix: "", contains: ""},
|
|
|
|
{regex: "(?i:abc).*", prefix: "", suffix: "", contains: ""},
|
|
|
|
{regex: ".*(?i:abc)", prefix: "", suffix: "", contains: ""},
|
|
|
|
{regex: ".*(?i:abc)def.*", prefix: "", suffix: "", contains: "def"},
|
|
|
|
{regex: "(?i).*(?-i:abc)def", prefix: "", suffix: "", contains: "abc"},
|
|
|
|
{regex: ".*(?msU:abc).*", prefix: "", suffix: "", contains: "abc"},
|
2020-10-12 04:17:29 -07:00
|
|
|
{regex: "[aA]bc.*", prefix: "", suffix: "", contains: "bc"},
|
2024-01-25 01:40:57 -08:00
|
|
|
{regex: "^5..$", prefix: "5", suffix: "", contains: ""},
|
|
|
|
{regex: "^release.*", prefix: "release", suffix: "", contains: ""},
|
|
|
|
{regex: "^env-[0-9]+laio[1]?[^0-9].*", prefix: "env-", suffix: "", contains: "laio"},
|
2020-06-26 02:49:09 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
for _, c := range cases {
|
|
|
|
parsed, err := syntax.Parse(c.regex, syntax.Perl)
|
2020-10-29 02:43:23 -07:00
|
|
|
require.NoError(t, err)
|
2020-06-26 02:49:09 -07:00
|
|
|
|
2020-07-07 01:38:04 -07:00
|
|
|
prefix, suffix, contains := optimizeConcatRegex(parsed)
|
2020-10-29 02:43:23 -07:00
|
|
|
require.Equal(t, c.prefix, prefix)
|
|
|
|
require.Equal(t, c.suffix, suffix)
|
|
|
|
require.Equal(t, c.contains, contains)
|
2020-06-26 02:49:09 -07:00
|
|
|
}
|
|
|
|
}
|
2022-02-08 02:03:20 -08:00
|
|
|
|
2024-01-25 01:40:57 -08:00
|
|
|
// Refer to https://github.com/prometheus/prometheus/issues/2651.
|
|
|
|
func TestFindSetMatches(t *testing.T) {
|
|
|
|
for _, c := range []struct {
|
|
|
|
pattern string
|
|
|
|
expMatches []string
|
|
|
|
expCaseSensitive bool
|
|
|
|
}{
|
|
|
|
// Single value, coming from a `bar=~"foo"` selector.
|
|
|
|
{"foo", []string{"foo"}, true},
|
|
|
|
{"^foo", []string{"foo"}, true},
|
|
|
|
{"^foo$", []string{"foo"}, true},
|
|
|
|
// Simple sets alternates.
|
|
|
|
{"foo|bar|zz", []string{"foo", "bar", "zz"}, true},
|
|
|
|
// Simple sets alternate and concat (bar|baz is parsed as "ba[rz]").
|
|
|
|
{"foo|bar|baz", []string{"foo", "bar", "baz"}, true},
|
|
|
|
// Simple sets alternate and concat and capture
|
|
|
|
{"foo|bar|baz|(zz)", []string{"foo", "bar", "baz", "zz"}, true},
|
|
|
|
// Simple sets alternate and concat and alternates with empty matches
|
|
|
|
// parsed as b(ar|(?:)|uzz) where b(?:) means literal b.
|
|
|
|
{"bar|b|buzz", []string{"bar", "b", "buzz"}, true},
|
|
|
|
// Skip nested capture groups.
|
|
|
|
{"^((bar|b|buzz))$", []string{"bar", "b", "buzz"}, true},
|
|
|
|
// Skip outer anchors (it's enforced anyway at the root).
|
|
|
|
{"^(bar|b|buzz)$", []string{"bar", "b", "buzz"}, true},
|
|
|
|
{"^(?:prod|production)$", []string{"prod", "production"}, true},
|
|
|
|
// Do not optimize regexp with inner anchors.
|
|
|
|
{"(bar|b|b^uz$z)", nil, false},
|
|
|
|
// Do not optimize regexp with empty string matcher.
|
|
|
|
{"^$|Running", nil, false},
|
|
|
|
// Simple sets containing escaped characters.
|
|
|
|
{"fo\\.o|bar\\?|\\^baz", []string{"fo.o", "bar?", "^baz"}, true},
|
|
|
|
// using charclass
|
|
|
|
{"[abc]d", []string{"ad", "bd", "cd"}, true},
|
|
|
|
// high low charset different => A(B[CD]|EF)|BC[XY]
|
|
|
|
{"ABC|ABD|AEF|BCX|BCY", []string{"ABC", "ABD", "AEF", "BCX", "BCY"}, true},
|
|
|
|
// triple concat
|
|
|
|
{"api_(v1|prom)_push", []string{"api_v1_push", "api_prom_push"}, true},
|
|
|
|
// triple concat with multiple alternates
|
|
|
|
{"(api|rpc)_(v1|prom)_push", []string{"api_v1_push", "api_prom_push", "rpc_v1_push", "rpc_prom_push"}, true},
|
|
|
|
{"(api|rpc)_(v1|prom)_(push|query)", []string{"api_v1_push", "api_v1_query", "api_prom_push", "api_prom_query", "rpc_v1_push", "rpc_v1_query", "rpc_prom_push", "rpc_prom_query"}, true},
|
|
|
|
// class starting with "-"
|
|
|
|
{"[-1-2][a-c]", []string{"-a", "-b", "-c", "1a", "1b", "1c", "2a", "2b", "2c"}, true},
|
|
|
|
{"[1^3]", []string{"1", "3", "^"}, true},
|
|
|
|
// OpPlus with concat
|
|
|
|
{"(.+)/(foo|bar)", nil, false},
|
|
|
|
// Simple sets containing special characters without escaping.
|
|
|
|
{"fo.o|bar?|^baz", nil, false},
|
|
|
|
// case sensitive wrapper.
|
|
|
|
{"(?i)foo", []string{"FOO"}, false},
|
|
|
|
// case sensitive wrapper on alternate.
|
|
|
|
{"(?i)foo|bar|baz", []string{"FOO", "BAR", "BAZ", "BAr", "BAz"}, false},
|
|
|
|
// mixed case sensitivity.
|
|
|
|
{"(api|rpc)_(v1|prom)_((?i)push|query)", nil, false},
|
|
|
|
// mixed case sensitivity concatenation only without capture group.
|
|
|
|
{"api_v1_(?i)push", nil, false},
|
|
|
|
// mixed case sensitivity alternation only without capture group.
|
|
|
|
{"api|(?i)rpc", nil, false},
|
|
|
|
// case sensitive after unsetting insensitivity.
|
|
|
|
{"rpc|(?i)(?-i)api", []string{"rpc", "api"}, true},
|
|
|
|
// case sensitive after unsetting insensitivity in all alternation options.
|
|
|
|
{"(?i)((?-i)api|(?-i)rpc)", []string{"api", "rpc"}, true},
|
|
|
|
// mixed case sensitivity after unsetting insensitivity.
|
|
|
|
{"(?i)rpc|(?-i)api", nil, false},
|
|
|
|
// too high charset combination
|
|
|
|
{"(api|rpc)_[^0-9]", nil, false},
|
|
|
|
// too many combinations
|
|
|
|
{"[a-z][a-z]", nil, false},
|
|
|
|
} {
|
|
|
|
c := c
|
|
|
|
t.Run(c.pattern, func(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
parsed, err := syntax.Parse(c.pattern, syntax.Perl)
|
|
|
|
require.NoError(t, err)
|
|
|
|
matches, actualCaseSensitive := findSetMatches(parsed)
|
|
|
|
require.Equal(t, c.expMatches, matches)
|
|
|
|
require.Equal(t, c.expCaseSensitive, actualCaseSensitive)
|
|
|
|
|
|
|
|
if c.expCaseSensitive {
|
|
|
|
// When the regexp is case sensitive, we want to ensure that the
|
|
|
|
// set matches are maintained in the final matcher.
|
|
|
|
r, err := NewFastRegexMatcher(c.pattern)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Equal(t, c.expMatches, r.SetMatches())
|
|
|
|
}
|
|
|
|
})
|
2022-02-08 02:03:20 -08:00
|
|
|
}
|
2024-01-25 01:40:57 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestFastRegexMatcher_SetMatches_ShouldReturnACopy(t *testing.T) {
|
|
|
|
m, err := NewFastRegexMatcher("a|b")
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Equal(t, []string{"a", "b"}, m.SetMatches())
|
|
|
|
|
|
|
|
// Manipulate the returned slice.
|
|
|
|
matches := m.SetMatches()
|
|
|
|
matches[0] = "xxx"
|
|
|
|
matches[1] = "yyy"
|
|
|
|
|
|
|
|
// Ensure that if we call SetMatches() again we get the original one.
|
|
|
|
require.Equal(t, []string{"a", "b"}, m.SetMatches())
|
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkFastRegexMatcher(b *testing.B) {
|
|
|
|
texts := generateRandomValues()
|
|
|
|
|
2022-02-08 02:03:20 -08:00
|
|
|
for _, r := range regexes {
|
2024-01-25 01:40:57 -08:00
|
|
|
b.Run(getTestNameFromRegexp(r), func(b *testing.B) {
|
2022-02-08 02:03:20 -08:00
|
|
|
m, err := NewFastRegexMatcher(r)
|
|
|
|
require.NoError(b, err)
|
2024-01-25 01:40:57 -08:00
|
|
|
|
2022-02-08 02:03:20 -08:00
|
|
|
b.ResetTimer()
|
|
|
|
for i := 0; i < b.N; i++ {
|
2024-01-25 01:40:57 -08:00
|
|
|
for _, text := range texts {
|
|
|
|
_ = m.MatchString(text)
|
|
|
|
}
|
2022-02-08 02:03:20 -08:00
|
|
|
}
|
|
|
|
})
|
2024-01-25 01:40:57 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestStringMatcherFromRegexp(t *testing.T) {
|
|
|
|
for _, c := range []struct {
|
|
|
|
pattern string
|
|
|
|
exp StringMatcher
|
|
|
|
}{
|
|
|
|
{".*", anyStringWithoutNewlineMatcher{}},
|
|
|
|
{".*?", anyStringWithoutNewlineMatcher{}},
|
|
|
|
{"(?s:.*)", trueMatcher{}},
|
|
|
|
{"(.*)", anyStringWithoutNewlineMatcher{}},
|
|
|
|
{"^.*$", anyStringWithoutNewlineMatcher{}},
|
|
|
|
{".+", &anyNonEmptyStringMatcher{matchNL: false}},
|
|
|
|
{"(?s:.+)", &anyNonEmptyStringMatcher{matchNL: true}},
|
|
|
|
{"^.+$", &anyNonEmptyStringMatcher{matchNL: false}},
|
|
|
|
{"(.+)", &anyNonEmptyStringMatcher{matchNL: false}},
|
|
|
|
{"", emptyStringMatcher{}},
|
|
|
|
{"^$", emptyStringMatcher{}},
|
|
|
|
{"^foo$", &equalStringMatcher{s: "foo", caseSensitive: true}},
|
|
|
|
{"^(?i:foo)$", &equalStringMatcher{s: "FOO", caseSensitive: false}},
|
|
|
|
{"^((?i:foo)|(bar))$", orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "bar", caseSensitive: true}})},
|
|
|
|
{`(?i:((foo|bar)))`, orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "BAR", caseSensitive: false}})},
|
|
|
|
{`(?i:((foo1|foo2|bar)))`, orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})},
|
|
|
|
{"^((?i:foo|oo)|(bar))$", orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "OO", caseSensitive: false}, &equalStringMatcher{s: "bar", caseSensitive: true}})},
|
|
|
|
{"(?i:(foo1|foo2|bar))", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})},
|
|
|
|
{".*foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}},
|
|
|
|
{"(.*)foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}},
|
|
|
|
{"(.*)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}},
|
|
|
|
{"(.+)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: false}, right: anyStringWithoutNewlineMatcher{}}},
|
|
|
|
{"^.+foo.+", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: false}, right: &anyNonEmptyStringMatcher{matchNL: false}}},
|
|
|
|
{"^(.*)(foo)(.*)$", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}},
|
|
|
|
{"^(.*)(foo|foobar)(.*)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}},
|
|
|
|
{"^(.*)(foo|foobar)(.+)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: anyStringWithoutNewlineMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: false}}},
|
|
|
|
{"^(.*)(bar|b|buzz)(.+)$", &containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringWithoutNewlineMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: false}}},
|
|
|
|
{"10\\.0\\.(1|2)\\.+", nil},
|
|
|
|
{"10\\.0\\.(1|2).+", &containsStringMatcher{substrings: []string{"10.0.1", "10.0.2"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: false}}},
|
|
|
|
{"^.+foo", &literalSuffixStringMatcher{left: &anyNonEmptyStringMatcher{}, suffix: "foo", suffixCaseSensitive: true}},
|
|
|
|
{"foo-.*$", &literalPrefixStringMatcher{prefix: "foo-", prefixCaseSensitive: true, right: anyStringWithoutNewlineMatcher{}}},
|
|
|
|
{"(prometheus|api_prom)_api_v1_.+", &containsStringMatcher{substrings: []string{"prometheus_api_v1_", "api_prom_api_v1_"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: false}}},
|
|
|
|
{"^((.*)(bar|b|buzz)(.+)|foo)$", orStringMatcher([]StringMatcher{&containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringWithoutNewlineMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: false}}, &equalStringMatcher{s: "foo", caseSensitive: true}})},
|
|
|
|
{"((fo(bar))|.+foo)", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "fobar", caseSensitive: true}}), &literalSuffixStringMatcher{suffix: "foo", suffixCaseSensitive: true, left: &anyNonEmptyStringMatcher{matchNL: false}}})},
|
|
|
|
{"(.+)/(gateway|cortex-gw|cortex-gw-internal)", &containsStringMatcher{substrings: []string{"/gateway", "/cortex-gw", "/cortex-gw-internal"}, left: &anyNonEmptyStringMatcher{matchNL: false}, right: nil}},
|
|
|
|
// we don't support case insensitive matching for contains.
|
|
|
|
// This is because there's no strings.IndexOfFold function.
|
|
|
|
// We can revisit later if this is really popular by using strings.ToUpper.
|
|
|
|
{"^(.*)((?i)foo|foobar)(.*)$", nil},
|
|
|
|
{"(api|rpc)_(v1|prom)_((?i)push|query)", nil},
|
|
|
|
{"[a-z][a-z]", nil},
|
|
|
|
{"[1^3]", nil},
|
|
|
|
{".*foo.*bar.*", nil},
|
|
|
|
{`\d*`, nil},
|
|
|
|
{".", nil},
|
|
|
|
{"/|/bar.*", &literalPrefixStringMatcher{prefix: "/", prefixCaseSensitive: true, right: orStringMatcher{emptyStringMatcher{}, &literalPrefixStringMatcher{prefix: "bar", prefixCaseSensitive: true, right: anyStringWithoutNewlineMatcher{}}}}},
|
|
|
|
// This one is not supported because `stringMatcherFromRegexp` is not reentrant for syntax.OpConcat.
|
|
|
|
// It would make the code too complex to handle it.
|
|
|
|
{"(.+)/(foo.*|bar$)", nil},
|
|
|
|
// Case sensitive alternate with same literal prefix and .* suffix.
|
|
|
|
{"(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixStringMatcher{prefix: "xyz-016a-ixb-", prefixCaseSensitive: true, right: orStringMatcher{&literalPrefixStringMatcher{prefix: "dp", prefixCaseSensitive: true, right: anyStringWithoutNewlineMatcher{}}, &literalPrefixStringMatcher{prefix: "op", prefixCaseSensitive: true, right: anyStringWithoutNewlineMatcher{}}}}},
|
|
|
|
// Case insensitive alternate with same literal prefix and .* suffix.
|
|
|
|
{"(?i:(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*))", &literalPrefixStringMatcher{prefix: "XYZ-016A-IXB-", prefixCaseSensitive: false, right: orStringMatcher{&literalPrefixStringMatcher{prefix: "DP", prefixCaseSensitive: false, right: anyStringWithoutNewlineMatcher{}}, &literalPrefixStringMatcher{prefix: "OP", prefixCaseSensitive: false, right: anyStringWithoutNewlineMatcher{}}}}},
|
|
|
|
{"(?i)(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixStringMatcher{prefix: "XYZ-016A-IXB-", prefixCaseSensitive: false, right: orStringMatcher{&literalPrefixStringMatcher{prefix: "DP", prefixCaseSensitive: false, right: anyStringWithoutNewlineMatcher{}}, &literalPrefixStringMatcher{prefix: "OP", prefixCaseSensitive: false, right: anyStringWithoutNewlineMatcher{}}}}},
|
|
|
|
// Concatenated variable length selectors are not supported.
|
|
|
|
{"foo.*.*", nil},
|
|
|
|
{"foo.+.+", nil},
|
|
|
|
{".*.*foo", nil},
|
|
|
|
{".+.+foo", nil},
|
|
|
|
{"aaa.?.?", nil},
|
|
|
|
{"aaa.?.*", nil},
|
|
|
|
// Regexps with ".?".
|
|
|
|
{"ext.?|xfs", orStringMatcher{&literalPrefixStringMatcher{prefix: "ext", prefixCaseSensitive: true, right: &zeroOrOneCharacterStringMatcher{matchNL: false}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}},
|
|
|
|
{"(?s)(ext.?|xfs)", orStringMatcher{&literalPrefixStringMatcher{prefix: "ext", prefixCaseSensitive: true, right: &zeroOrOneCharacterStringMatcher{matchNL: true}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}},
|
|
|
|
{"foo.?", &literalPrefixStringMatcher{prefix: "foo", prefixCaseSensitive: true, right: &zeroOrOneCharacterStringMatcher{matchNL: false}}},
|
|
|
|
{"f.?o", nil},
|
|
|
|
} {
|
|
|
|
c := c
|
|
|
|
t.Run(c.pattern, func(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
parsed, err := syntax.Parse(c.pattern, syntax.Perl)
|
|
|
|
require.NoError(t, err)
|
|
|
|
matches := stringMatcherFromRegexp(parsed)
|
|
|
|
require.Equal(t, c.exp, matches)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestStringMatcherFromRegexp_LiteralPrefix(t *testing.T) {
|
|
|
|
for _, c := range []struct {
|
|
|
|
pattern string
|
|
|
|
expectedLiteralPrefixMatchers int
|
|
|
|
expectedMatches []string
|
|
|
|
expectedNotMatches []string
|
|
|
|
}{
|
|
|
|
// Case sensitive.
|
|
|
|
{
|
|
|
|
pattern: "(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)",
|
|
|
|
expectedLiteralPrefixMatchers: 3,
|
|
|
|
expectedMatches: []string{"xyz-016a-ixb-dp", "xyz-016a-ixb-dpXXX", "xyz-016a-ixb-op", "xyz-016a-ixb-opXXX"},
|
|
|
|
expectedNotMatches: []string{"XYZ-016a-ixb-dp", "xyz-016a-ixb-d", "XYZ-016a-ixb-op", "xyz-016a-ixb-o", "xyz", "dp", "xyz-016a-ixb-dp\n"},
|
|
|
|
},
|
|
|
|
|
|
|
|
// Case insensitive.
|
|
|
|
{
|
|
|
|
pattern: "(?i)(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)",
|
|
|
|
expectedLiteralPrefixMatchers: 3,
|
|
|
|
expectedMatches: []string{"xyz-016a-ixb-dp", "XYZ-016a-ixb-dpXXX", "xyz-016a-ixb-op", "XYZ-016a-ixb-opXXX"},
|
|
|
|
expectedNotMatches: []string{"xyz-016a-ixb-d", "xyz", "dp", "xyz-016a-ixb-dp\n"},
|
|
|
|
},
|
|
|
|
|
|
|
|
// Nested literal prefixes, case sensitive.
|
|
|
|
{
|
|
|
|
pattern: "(xyz-(aaa-(111.*)|bbb-(222.*)))|(xyz-(aaa-(333.*)|bbb-(444.*)))",
|
|
|
|
expectedLiteralPrefixMatchers: 10,
|
|
|
|
expectedMatches: []string{"xyz-aaa-111", "xyz-aaa-111XXX", "xyz-aaa-333", "xyz-aaa-333XXX", "xyz-bbb-222", "xyz-bbb-222XXX", "xyz-bbb-444", "xyz-bbb-444XXX"},
|
|
|
|
expectedNotMatches: []string{"XYZ-aaa-111", "xyz-aaa-11", "xyz-aaa-222", "xyz-bbb-111"},
|
|
|
|
},
|
|
|
|
|
|
|
|
// Nested literal prefixes, case insensitive.
|
|
|
|
{
|
|
|
|
pattern: "(?i)(xyz-(aaa-(111.*)|bbb-(222.*)))|(xyz-(aaa-(333.*)|bbb-(444.*)))",
|
|
|
|
expectedLiteralPrefixMatchers: 10,
|
|
|
|
expectedMatches: []string{"xyz-aaa-111", "XYZ-aaa-111XXX", "xyz-aaa-333", "xyz-AAA-333XXX", "xyz-bbb-222", "xyz-BBB-222XXX", "XYZ-bbb-444", "xyz-bbb-444XXX"},
|
|
|
|
expectedNotMatches: []string{"xyz-aaa-11", "xyz-aaa-222", "xyz-bbb-111"},
|
|
|
|
},
|
|
|
|
|
|
|
|
// Mixed case sensitivity.
|
|
|
|
{
|
|
|
|
pattern: "(xyz-((?i)(aaa.*|bbb.*)))",
|
|
|
|
expectedLiteralPrefixMatchers: 3,
|
|
|
|
expectedMatches: []string{"xyz-aaa", "xyz-AAA", "xyz-aaaXXX", "xyz-AAAXXX", "xyz-bbb", "xyz-BBBXXX"},
|
|
|
|
expectedNotMatches: []string{"XYZ-aaa", "xyz-aa", "yz-aaa", "aaa"},
|
|
|
|
},
|
|
|
|
} {
|
|
|
|
t.Run(c.pattern, func(t *testing.T) {
|
|
|
|
parsed, err := syntax.Parse(c.pattern, syntax.Perl)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
matcher := stringMatcherFromRegexp(parsed)
|
|
|
|
require.NotNil(t, matcher)
|
|
|
|
|
|
|
|
re := regexp.MustCompile("^" + c.pattern + "$")
|
|
|
|
|
|
|
|
// Pre-condition check: ensure it contains literalPrefixStringMatcher.
|
|
|
|
numPrefixMatchers := 0
|
|
|
|
visitStringMatcher(matcher, func(matcher StringMatcher) {
|
|
|
|
if _, ok := matcher.(*literalPrefixStringMatcher); ok {
|
|
|
|
numPrefixMatchers++
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
require.Equal(t, c.expectedLiteralPrefixMatchers, numPrefixMatchers)
|
|
|
|
|
|
|
|
for _, value := range c.expectedMatches {
|
|
|
|
require.Truef(t, matcher.Matches(value), "Value: %s", value)
|
|
|
|
|
|
|
|
// Ensure the golang regexp engine would return the same.
|
|
|
|
require.Truef(t, re.MatchString(value), "Value: %s", value)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, value := range c.expectedNotMatches {
|
|
|
|
require.Falsef(t, matcher.Matches(value), "Value: %s", value)
|
|
|
|
|
|
|
|
// Ensure the golang regexp engine would return the same.
|
|
|
|
require.Falsef(t, re.MatchString(value), "Value: %s", value)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestStringMatcherFromRegexp_LiteralSuffix(t *testing.T) {
|
|
|
|
for _, c := range []struct {
|
|
|
|
pattern string
|
|
|
|
expectedLiteralSuffixMatchers int
|
|
|
|
expectedMatches []string
|
|
|
|
expectedNotMatches []string
|
|
|
|
}{
|
|
|
|
// Case sensitive.
|
|
|
|
{
|
|
|
|
pattern: "(.*xyz-016a-ixb-dp|.*xyz-016a-ixb-op)",
|
|
|
|
expectedLiteralSuffixMatchers: 2,
|
|
|
|
expectedMatches: []string{"xyz-016a-ixb-dp", "XXXxyz-016a-ixb-dp", "xyz-016a-ixb-op", "XXXxyz-016a-ixb-op"},
|
|
|
|
expectedNotMatches: []string{"XYZ-016a-ixb-dp", "yz-016a-ixb-dp", "XYZ-016a-ixb-op", "xyz-016a-ixb-o", "xyz", "dp", "\nxyz-016a-ixb-dp"},
|
|
|
|
},
|
|
|
|
|
|
|
|
// Case insensitive.
|
|
|
|
{
|
|
|
|
pattern: "(?i)(.*xyz-016a-ixb-dp|.*xyz-016a-ixb-op)",
|
|
|
|
expectedLiteralSuffixMatchers: 2,
|
|
|
|
expectedMatches: []string{"xyz-016a-ixb-dp", "XYZ-016a-ixb-dp", "XXXxyz-016a-ixb-dp", "XyZ-016a-ixb-op", "XXXxyz-016a-ixb-op"},
|
|
|
|
expectedNotMatches: []string{"yz-016a-ixb-dp", "xyz-016a-ixb-o", "xyz", "dp", "\nxyz-016a-ixb-dp"},
|
|
|
|
},
|
|
|
|
|
|
|
|
// Nested literal suffixes, case sensitive.
|
|
|
|
{
|
|
|
|
pattern: "(.*aaa|.*bbb(.*ccc|.*ddd))",
|
|
|
|
expectedLiteralSuffixMatchers: 3,
|
|
|
|
expectedMatches: []string{"aaa", "XXXaaa", "bbbccc", "XXXbbbccc", "XXXbbbXXXccc", "bbbddd", "bbbddd", "XXXbbbddd", "XXXbbbXXXddd", "bbbXXXccc", "aaabbbccc", "aaabbbddd"},
|
|
|
|
expectedNotMatches: []string{"AAA", "aa", "Xaa", "BBBCCC", "bb", "Xbb", "bbccc", "bbbcc", "bbbdd"},
|
|
|
|
},
|
|
|
|
|
|
|
|
// Mixed case sensitivity.
|
|
|
|
{
|
|
|
|
pattern: "(.*aaa|.*bbb((?i)(.*ccc|.*ddd)))",
|
|
|
|
expectedLiteralSuffixMatchers: 3,
|
|
|
|
expectedMatches: []string{"aaa", "XXXaaa", "bbbccc", "bbbCCC", "bbbXXXCCC", "bbbddd", "bbbDDD", "bbbXXXddd", "bbbXXXDDD"},
|
|
|
|
expectedNotMatches: []string{"AAA", "XXXAAA", "BBBccc", "BBBCCC", "aaaBBB"},
|
|
|
|
},
|
|
|
|
} {
|
|
|
|
t.Run(c.pattern, func(t *testing.T) {
|
|
|
|
parsed, err := syntax.Parse(c.pattern, syntax.Perl)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
matcher := stringMatcherFromRegexp(parsed)
|
|
|
|
require.NotNil(t, matcher)
|
|
|
|
|
|
|
|
re := regexp.MustCompile("^" + c.pattern + "$")
|
|
|
|
|
|
|
|
// Pre-condition check: ensure it contains literalSuffixStringMatcher.
|
|
|
|
numSuffixMatchers := 0
|
|
|
|
visitStringMatcher(matcher, func(matcher StringMatcher) {
|
|
|
|
if _, ok := matcher.(*literalSuffixStringMatcher); ok {
|
|
|
|
numSuffixMatchers++
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
require.Equal(t, c.expectedLiteralSuffixMatchers, numSuffixMatchers)
|
|
|
|
|
|
|
|
for _, value := range c.expectedMatches {
|
|
|
|
require.Truef(t, matcher.Matches(value), "Value: %s", value)
|
|
|
|
|
|
|
|
// Ensure the golang regexp engine would return the same.
|
|
|
|
require.Truef(t, re.MatchString(value), "Value: %s", value)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, value := range c.expectedNotMatches {
|
|
|
|
require.Falsef(t, matcher.Matches(value), "Value: %s", value)
|
|
|
|
|
|
|
|
// Ensure the golang regexp engine would return the same.
|
|
|
|
require.Falsef(t, re.MatchString(value), "Value: %s", value)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestStringMatcherFromRegexp_Quest(t *testing.T) {
|
|
|
|
for _, c := range []struct {
|
|
|
|
pattern string
|
|
|
|
expectedZeroOrOneMatchers int
|
|
|
|
expectedMatches []string
|
|
|
|
expectedNotMatches []string
|
|
|
|
}{
|
|
|
|
// Not match newline.
|
|
|
|
{
|
|
|
|
pattern: "test.?",
|
|
|
|
expectedZeroOrOneMatchers: 1,
|
|
|
|
expectedMatches: []string{"test", "test!"},
|
|
|
|
expectedNotMatches: []string{"test\n", "tes", "test!!"},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
pattern: ".?test",
|
|
|
|
expectedZeroOrOneMatchers: 1,
|
|
|
|
expectedMatches: []string{"test", "!test"},
|
|
|
|
expectedNotMatches: []string{"\ntest", "tes", "test!"},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
pattern: "(aaa.?|bbb.?)",
|
|
|
|
expectedZeroOrOneMatchers: 2,
|
|
|
|
expectedMatches: []string{"aaa", "aaaX", "bbb", "bbbX"},
|
|
|
|
expectedNotMatches: []string{"aa", "aaaXX", "aaa\n", "bb", "bbbXX", "bbb\n"},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
pattern: ".*aaa.?",
|
|
|
|
expectedZeroOrOneMatchers: 1,
|
|
|
|
expectedMatches: []string{"aaa", "Xaaa", "aaaX", "XXXaaa", "XXXaaaX"},
|
|
|
|
expectedNotMatches: []string{"aa", "aaaXX", "XXXaaaXXX", "XXXaaa\n"},
|
|
|
|
},
|
|
|
|
|
|
|
|
// Match newline.
|
|
|
|
{
|
|
|
|
pattern: "(?s)test.?",
|
|
|
|
expectedZeroOrOneMatchers: 1,
|
|
|
|
expectedMatches: []string{"test", "test!", "test\n"},
|
|
|
|
expectedNotMatches: []string{"tes", "test!!", "test\n\n"},
|
|
|
|
},
|
|
|
|
|
|
|
|
// Mixed flags (a part matches newline another doesn't).
|
|
|
|
{
|
|
|
|
pattern: "(aaa.?|((?s).?bbb.+))",
|
|
|
|
expectedZeroOrOneMatchers: 2,
|
|
|
|
expectedMatches: []string{"aaa", "aaaX", "bbbX", "XbbbX", "bbbXXX", "\nbbbX"},
|
|
|
|
expectedNotMatches: []string{"aa", "aaa\n", "Xbbb", "\nbbb"},
|
|
|
|
},
|
|
|
|
} {
|
|
|
|
t.Run(c.pattern, func(t *testing.T) {
|
|
|
|
parsed, err := syntax.Parse(c.pattern, syntax.Perl)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
matcher := stringMatcherFromRegexp(parsed)
|
|
|
|
require.NotNil(t, matcher)
|
|
|
|
|
|
|
|
re := regexp.MustCompile("^" + c.pattern + "$")
|
|
|
|
|
|
|
|
// Pre-condition check: ensure it contains zeroOrOneCharacterStringMatcher.
|
|
|
|
numZeroOrOneMatchers := 0
|
|
|
|
visitStringMatcher(matcher, func(matcher StringMatcher) {
|
|
|
|
if _, ok := matcher.(*zeroOrOneCharacterStringMatcher); ok {
|
|
|
|
numZeroOrOneMatchers++
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
require.Equal(t, c.expectedZeroOrOneMatchers, numZeroOrOneMatchers)
|
|
|
|
|
|
|
|
for _, value := range c.expectedMatches {
|
|
|
|
require.Truef(t, matcher.Matches(value), "Value: %s", value)
|
|
|
|
|
|
|
|
// Ensure the golang regexp engine would return the same.
|
|
|
|
require.Truef(t, re.MatchString(value), "Value: %s", value)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, value := range c.expectedNotMatches {
|
|
|
|
require.Falsef(t, matcher.Matches(value), "Value: %s", value)
|
|
|
|
|
|
|
|
// Ensure the golang regexp engine would return the same.
|
|
|
|
require.Falsef(t, re.MatchString(value), "Value: %s", value)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func randString(randGenerator *rand.Rand, length int) string {
|
|
|
|
b := make([]rune, length)
|
|
|
|
for i := range b {
|
|
|
|
b[i] = asciiRunes[randGenerator.Intn(len(asciiRunes))]
|
|
|
|
}
|
|
|
|
return string(b)
|
|
|
|
}
|
|
|
|
|
|
|
|
func randStrings(randGenerator *rand.Rand, many, length int) []string {
|
|
|
|
out := make([]string, 0, many)
|
|
|
|
for i := 0; i < many; i++ {
|
|
|
|
out = append(out, randString(randGenerator, length))
|
|
|
|
}
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestOptimizeEqualStringMatchers(t *testing.T) {
|
|
|
|
tests := map[string]struct {
|
|
|
|
input StringMatcher
|
|
|
|
expectedValues []string
|
|
|
|
expectedCaseSensitive bool
|
|
|
|
}{
|
|
|
|
"should skip optimization on orStringMatcher with containsStringMatcher": {
|
|
|
|
input: orStringMatcher{
|
|
|
|
&equalStringMatcher{s: "FOO", caseSensitive: true},
|
|
|
|
&containsStringMatcher{substrings: []string{"a", "b", "c"}},
|
|
|
|
},
|
|
|
|
expectedValues: nil,
|
|
|
|
},
|
|
|
|
"should run optimization on orStringMatcher with equalStringMatcher and same case sensitivity": {
|
|
|
|
input: orStringMatcher{
|
|
|
|
&equalStringMatcher{s: "FOO", caseSensitive: true},
|
|
|
|
&equalStringMatcher{s: "bar", caseSensitive: true},
|
|
|
|
&equalStringMatcher{s: "baz", caseSensitive: true},
|
|
|
|
},
|
|
|
|
expectedValues: []string{"FOO", "bar", "baz"},
|
|
|
|
expectedCaseSensitive: true,
|
|
|
|
},
|
|
|
|
"should skip optimization on orStringMatcher with equalStringMatcher but different case sensitivity": {
|
|
|
|
input: orStringMatcher{
|
|
|
|
&equalStringMatcher{s: "FOO", caseSensitive: true},
|
|
|
|
&equalStringMatcher{s: "bar", caseSensitive: false},
|
|
|
|
&equalStringMatcher{s: "baz", caseSensitive: true},
|
|
|
|
},
|
|
|
|
expectedValues: nil,
|
|
|
|
},
|
|
|
|
"should run optimization on orStringMatcher with nested orStringMatcher and equalStringMatcher, and same case sensitivity": {
|
|
|
|
input: orStringMatcher{
|
|
|
|
&equalStringMatcher{s: "FOO", caseSensitive: true},
|
|
|
|
orStringMatcher{
|
|
|
|
&equalStringMatcher{s: "bar", caseSensitive: true},
|
|
|
|
&equalStringMatcher{s: "xxx", caseSensitive: true},
|
|
|
|
},
|
|
|
|
&equalStringMatcher{s: "baz", caseSensitive: true},
|
|
|
|
},
|
|
|
|
expectedValues: []string{"FOO", "bar", "xxx", "baz"},
|
|
|
|
expectedCaseSensitive: true,
|
|
|
|
},
|
|
|
|
"should skip optimization on orStringMatcher with nested orStringMatcher and equalStringMatcher, but different case sensitivity": {
|
|
|
|
input: orStringMatcher{
|
|
|
|
&equalStringMatcher{s: "FOO", caseSensitive: true},
|
|
|
|
orStringMatcher{
|
|
|
|
// Case sensitivity is different within items at the same level.
|
|
|
|
&equalStringMatcher{s: "bar", caseSensitive: true},
|
|
|
|
&equalStringMatcher{s: "xxx", caseSensitive: false},
|
|
|
|
},
|
|
|
|
&equalStringMatcher{s: "baz", caseSensitive: true},
|
|
|
|
},
|
|
|
|
expectedValues: nil,
|
|
|
|
},
|
|
|
|
"should skip optimization on orStringMatcher with nested orStringMatcher and equalStringMatcher, but different case sensitivity in the nested one": {
|
|
|
|
input: orStringMatcher{
|
|
|
|
&equalStringMatcher{s: "FOO", caseSensitive: true},
|
|
|
|
// Case sensitivity is different between the parent and child.
|
|
|
|
orStringMatcher{
|
|
|
|
&equalStringMatcher{s: "bar", caseSensitive: false},
|
|
|
|
&equalStringMatcher{s: "xxx", caseSensitive: false},
|
|
|
|
},
|
|
|
|
&equalStringMatcher{s: "baz", caseSensitive: true},
|
|
|
|
},
|
|
|
|
expectedValues: nil,
|
|
|
|
},
|
|
|
|
"should return unchanged values on few case insensitive matchers": {
|
|
|
|
input: orStringMatcher{
|
|
|
|
&equalStringMatcher{s: "FOO", caseSensitive: false},
|
|
|
|
orStringMatcher{
|
|
|
|
&equalStringMatcher{s: "bAr", caseSensitive: false},
|
|
|
|
},
|
|
|
|
&equalStringMatcher{s: "baZ", caseSensitive: false},
|
|
|
|
},
|
|
|
|
expectedValues: []string{"FOO", "bAr", "baZ"},
|
|
|
|
expectedCaseSensitive: false,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
for testName, testData := range tests {
|
|
|
|
t.Run(testName, func(t *testing.T) {
|
|
|
|
actualMatcher := optimizeEqualStringMatchers(testData.input, 0)
|
|
|
|
|
|
|
|
if testData.expectedValues == nil {
|
|
|
|
require.IsType(t, testData.input, actualMatcher)
|
|
|
|
} else {
|
|
|
|
require.IsType(t, &equalMultiStringSliceMatcher{}, actualMatcher)
|
|
|
|
require.Equal(t, testData.expectedValues, actualMatcher.(*equalMultiStringSliceMatcher).values)
|
|
|
|
require.Equal(t, testData.expectedCaseSensitive, actualMatcher.(*equalMultiStringSliceMatcher).caseSensitive)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestNewEqualMultiStringMatcher(t *testing.T) {
|
|
|
|
tests := map[string]struct {
|
|
|
|
values []string
|
|
|
|
caseSensitive bool
|
|
|
|
expectedValuesMap map[string]struct{}
|
|
|
|
expectedValuesList []string
|
|
|
|
}{
|
|
|
|
"few case sensitive values": {
|
|
|
|
values: []string{"a", "B"},
|
|
|
|
caseSensitive: true,
|
|
|
|
expectedValuesList: []string{"a", "B"},
|
|
|
|
},
|
|
|
|
"few case insensitive values": {
|
|
|
|
values: []string{"a", "B"},
|
|
|
|
caseSensitive: false,
|
|
|
|
expectedValuesList: []string{"a", "B"},
|
|
|
|
},
|
|
|
|
"many case sensitive values": {
|
|
|
|
values: []string{"a", "B", "c", "D", "e", "F", "g", "H", "i", "L", "m", "N", "o", "P", "q", "r"},
|
|
|
|
caseSensitive: true,
|
|
|
|
expectedValuesMap: map[string]struct{}{"a": {}, "B": {}, "c": {}, "D": {}, "e": {}, "F": {}, "g": {}, "H": {}, "i": {}, "L": {}, "m": {}, "N": {}, "o": {}, "P": {}, "q": {}, "r": {}},
|
|
|
|
},
|
|
|
|
"many case insensitive values": {
|
|
|
|
values: []string{"a", "B", "c", "D", "e", "F", "g", "H", "i", "L", "m", "N", "o", "P", "q", "r"},
|
|
|
|
caseSensitive: false,
|
|
|
|
expectedValuesMap: map[string]struct{}{"a": {}, "b": {}, "c": {}, "d": {}, "e": {}, "f": {}, "g": {}, "h": {}, "i": {}, "l": {}, "m": {}, "n": {}, "o": {}, "p": {}, "q": {}, "r": {}},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
for testName, testData := range tests {
|
|
|
|
t.Run(testName, func(t *testing.T) {
|
|
|
|
matcher := newEqualMultiStringMatcher(testData.caseSensitive, len(testData.values))
|
|
|
|
for _, v := range testData.values {
|
|
|
|
matcher.add(v)
|
|
|
|
}
|
|
|
|
if testData.expectedValuesMap != nil {
|
|
|
|
require.IsType(t, &equalMultiStringMapMatcher{}, matcher)
|
|
|
|
require.Equal(t, testData.expectedValuesMap, matcher.(*equalMultiStringMapMatcher).values)
|
|
|
|
require.Equal(t, testData.caseSensitive, matcher.(*equalMultiStringMapMatcher).caseSensitive)
|
|
|
|
}
|
|
|
|
if testData.expectedValuesList != nil {
|
|
|
|
require.IsType(t, &equalMultiStringSliceMatcher{}, matcher)
|
|
|
|
require.Equal(t, testData.expectedValuesList, matcher.(*equalMultiStringSliceMatcher).values)
|
|
|
|
require.Equal(t, testData.caseSensitive, matcher.(*equalMultiStringSliceMatcher).caseSensitive)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestEqualMultiStringMatcher_Matches(t *testing.T) {
|
|
|
|
tests := map[string]struct {
|
|
|
|
values []string
|
|
|
|
caseSensitive bool
|
|
|
|
expectedMatches []string
|
|
|
|
expectedNotMatches []string
|
|
|
|
}{
|
|
|
|
"few case sensitive values": {
|
|
|
|
values: []string{"a", "B"},
|
|
|
|
caseSensitive: true,
|
|
|
|
expectedMatches: []string{"a", "B"},
|
|
|
|
expectedNotMatches: []string{"A", "b"},
|
|
|
|
},
|
|
|
|
"few case insensitive values": {
|
|
|
|
values: []string{"a", "B"},
|
|
|
|
caseSensitive: false,
|
|
|
|
expectedMatches: []string{"a", "A", "b", "B"},
|
|
|
|
expectedNotMatches: []string{"c", "C"},
|
|
|
|
},
|
|
|
|
"many case sensitive values": {
|
|
|
|
values: []string{"a", "B", "c", "D", "e", "F", "g", "H", "i", "L", "m", "N", "o", "P", "q", "r"},
|
|
|
|
caseSensitive: true,
|
|
|
|
expectedMatches: []string{"a", "B"},
|
|
|
|
expectedNotMatches: []string{"A", "b"},
|
|
|
|
},
|
|
|
|
"many case insensitive values": {
|
|
|
|
values: []string{"a", "B", "c", "D", "e", "F", "g", "H", "i", "L", "m", "N", "o", "P", "q", "r"},
|
|
|
|
caseSensitive: false,
|
|
|
|
expectedMatches: []string{"a", "A", "b", "B"},
|
|
|
|
expectedNotMatches: []string{"x", "X"},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
for testName, testData := range tests {
|
|
|
|
t.Run(testName, func(t *testing.T) {
|
|
|
|
matcher := newEqualMultiStringMatcher(testData.caseSensitive, len(testData.values))
|
|
|
|
for _, v := range testData.values {
|
|
|
|
matcher.add(v)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, v := range testData.expectedMatches {
|
|
|
|
require.True(t, matcher.Matches(v), "value: %s", v)
|
|
|
|
}
|
|
|
|
for _, v := range testData.expectedNotMatches {
|
|
|
|
require.False(t, matcher.Matches(v), "value: %s", v)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestFindEqualStringMatchers(t *testing.T) {
|
|
|
|
type match struct {
|
|
|
|
s string
|
|
|
|
caseSensitive bool
|
|
|
|
}
|
|
|
|
|
|
|
|
// Utility to call findEqualStringMatchers() and collect all callback invocations.
|
|
|
|
findEqualStringMatchersAndCollectMatches := func(input StringMatcher) (matches []match, ok bool) {
|
|
|
|
ok = findEqualStringMatchers(input, func(matcher *equalStringMatcher) bool {
|
|
|
|
matches = append(matches, match{matcher.s, matcher.caseSensitive})
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
t.Run("empty matcher", func(t *testing.T) {
|
|
|
|
actualMatches, actualOk := findEqualStringMatchersAndCollectMatches(emptyStringMatcher{})
|
|
|
|
require.False(t, actualOk)
|
|
|
|
require.Empty(t, actualMatches)
|
|
|
|
})
|
|
|
|
|
|
|
|
t.Run("concat of literal matchers (case sensitive)", func(t *testing.T) {
|
|
|
|
actualMatches, actualOk := findEqualStringMatchersAndCollectMatches(
|
|
|
|
orStringMatcher{
|
|
|
|
&equalStringMatcher{s: "test-1", caseSensitive: true},
|
|
|
|
&equalStringMatcher{s: "test-2", caseSensitive: true},
|
|
|
|
},
|
|
|
|
)
|
|
|
|
|
|
|
|
require.True(t, actualOk)
|
|
|
|
require.Equal(t, []match{{"test-1", true}, {"test-2", true}}, actualMatches)
|
|
|
|
})
|
|
|
|
|
|
|
|
t.Run("concat of literal matchers (case insensitive)", func(t *testing.T) {
|
|
|
|
actualMatches, actualOk := findEqualStringMatchersAndCollectMatches(
|
|
|
|
orStringMatcher{
|
|
|
|
&equalStringMatcher{s: "test-1", caseSensitive: false},
|
|
|
|
&equalStringMatcher{s: "test-2", caseSensitive: false},
|
|
|
|
},
|
|
|
|
)
|
|
|
|
|
|
|
|
require.True(t, actualOk)
|
|
|
|
require.Equal(t, []match{{"test-1", false}, {"test-2", false}}, actualMatches)
|
|
|
|
})
|
|
|
|
|
|
|
|
t.Run("concat of literal matchers (mixed case)", func(t *testing.T) {
|
|
|
|
actualMatches, actualOk := findEqualStringMatchersAndCollectMatches(
|
|
|
|
orStringMatcher{
|
|
|
|
&equalStringMatcher{s: "test-1", caseSensitive: false},
|
|
|
|
&equalStringMatcher{s: "test-2", caseSensitive: true},
|
|
|
|
},
|
|
|
|
)
|
|
|
|
|
|
|
|
require.True(t, actualOk)
|
|
|
|
require.Equal(t, []match{{"test-1", false}, {"test-2", true}}, actualMatches)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
// This benchmark is used to find a good threshold to use to apply the optimization
|
|
|
|
// done by optimizeEqualStringMatchers().
|
|
|
|
func BenchmarkOptimizeEqualStringMatchers(b *testing.B) {
|
|
|
|
randGenerator := rand.New(rand.NewSource(time.Now().UnixNano()))
|
|
|
|
|
|
|
|
// Generate variable lengths random texts to match against.
|
|
|
|
texts := append([]string{}, randStrings(randGenerator, 10, 10)...)
|
|
|
|
texts = append(texts, randStrings(randGenerator, 5, 30)...)
|
|
|
|
texts = append(texts, randStrings(randGenerator, 1, 100)...)
|
|
|
|
|
|
|
|
for numAlternations := 2; numAlternations <= 256; numAlternations *= 2 {
|
|
|
|
for _, caseSensitive := range []bool{true, false} {
|
|
|
|
b.Run(fmt.Sprintf("alternations: %d case sensitive: %t", numAlternations, caseSensitive), func(b *testing.B) {
|
|
|
|
// Generate a regex with the expected number of alternations.
|
|
|
|
re := strings.Join(randStrings(randGenerator, numAlternations, 10), "|")
|
|
|
|
if !caseSensitive {
|
|
|
|
re = "(?i:(" + re + "))"
|
|
|
|
}
|
|
|
|
|
|
|
|
parsed, err := syntax.Parse(re, syntax.Perl)
|
|
|
|
require.NoError(b, err)
|
|
|
|
|
|
|
|
unoptimized := stringMatcherFromRegexpInternal(parsed)
|
|
|
|
require.IsType(b, orStringMatcher{}, unoptimized)
|
|
|
|
|
|
|
|
optimized := optimizeEqualStringMatchers(unoptimized, 0)
|
|
|
|
if numAlternations < minEqualMultiStringMatcherMapThreshold {
|
|
|
|
require.IsType(b, &equalMultiStringSliceMatcher{}, optimized)
|
|
|
|
} else {
|
|
|
|
require.IsType(b, &equalMultiStringMapMatcher{}, optimized)
|
|
|
|
}
|
|
|
|
|
|
|
|
b.Run("without optimizeEqualStringMatchers()", func(b *testing.B) {
|
|
|
|
for n := 0; n < b.N; n++ {
|
|
|
|
for _, t := range texts {
|
|
|
|
unoptimized.Matches(t)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
b.Run("with optimizeEqualStringMatchers()", func(b *testing.B) {
|
|
|
|
for n := 0; n < b.N; n++ {
|
|
|
|
for _, t := range texts {
|
|
|
|
optimized.Matches(t)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestZeroOrOneCharacterStringMatcher(t *testing.T) {
|
2024-05-13 06:36:55 -07:00
|
|
|
t.Run("match newline", func(t *testing.T) {
|
|
|
|
matcher := &zeroOrOneCharacterStringMatcher{matchNL: true}
|
|
|
|
require.True(t, matcher.Matches(""))
|
|
|
|
require.True(t, matcher.Matches("x"))
|
|
|
|
require.True(t, matcher.Matches("\n"))
|
|
|
|
require.False(t, matcher.Matches("xx"))
|
|
|
|
require.False(t, matcher.Matches("\n\n"))
|
|
|
|
})
|
|
|
|
|
|
|
|
t.Run("do not match newline", func(t *testing.T) {
|
|
|
|
matcher := &zeroOrOneCharacterStringMatcher{matchNL: false}
|
|
|
|
require.True(t, matcher.Matches(""))
|
|
|
|
require.True(t, matcher.Matches("x"))
|
|
|
|
require.False(t, matcher.Matches("\n"))
|
|
|
|
require.False(t, matcher.Matches("xx"))
|
|
|
|
require.False(t, matcher.Matches("\n\n"))
|
|
|
|
})
|
|
|
|
|
|
|
|
t.Run("unicode", func(t *testing.T) {
|
|
|
|
// Just for documentation purposes, emoji1 is 1 rune, emoji2 is 2 runes.
|
|
|
|
// Having this in mind, will make future readers fixing tests easier.
|
|
|
|
emoji1 := "😀"
|
|
|
|
emoji2 := "❤️"
|
|
|
|
require.Equal(t, 1, utf8.RuneCountInString(emoji1))
|
|
|
|
require.Equal(t, 2, utf8.RuneCountInString(emoji2))
|
|
|
|
|
|
|
|
matcher := &zeroOrOneCharacterStringMatcher{matchNL: true}
|
|
|
|
require.True(t, matcher.Matches(emoji1))
|
|
|
|
require.False(t, matcher.Matches(emoji2))
|
|
|
|
require.False(t, matcher.Matches(emoji1+emoji1))
|
|
|
|
require.False(t, matcher.Matches("x"+emoji1))
|
|
|
|
require.False(t, matcher.Matches(emoji1+"x"))
|
|
|
|
require.False(t, matcher.Matches(emoji1+emoji2))
|
|
|
|
})
|
2024-05-13 08:44:07 -07:00
|
|
|
|
|
|
|
t.Run("invalid unicode", func(t *testing.T) {
|
|
|
|
// Just for reference, we also compare to what `^.?$` regular expression matches.
|
|
|
|
re := regexp.MustCompile("^.?$")
|
|
|
|
matcher := &zeroOrOneCharacterStringMatcher{matchNL: true}
|
|
|
|
|
|
|
|
requireMatches := func(s string, expected bool) {
|
|
|
|
t.Helper()
|
|
|
|
require.Equal(t, expected, matcher.Matches(s))
|
|
|
|
require.Equal(t, re.MatchString(s), matcher.Matches(s))
|
|
|
|
}
|
|
|
|
|
|
|
|
requireMatches("\xff", true)
|
|
|
|
requireMatches("x\xff", false)
|
|
|
|
requireMatches("\xffx", false)
|
|
|
|
requireMatches("\xff\xfe", false)
|
|
|
|
})
|
2024-05-13 06:36:55 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkZeroOrOneCharacterStringMatcher(b *testing.B) {
|
|
|
|
type benchCase struct {
|
|
|
|
str string
|
|
|
|
matches bool
|
|
|
|
}
|
|
|
|
|
|
|
|
emoji1 := "😀"
|
|
|
|
emoji2 := "❤️"
|
|
|
|
cases := []benchCase{
|
|
|
|
{"", true},
|
|
|
|
{"x", true},
|
|
|
|
{"\n", true},
|
|
|
|
{"xx", false},
|
|
|
|
{"\n\n", false},
|
|
|
|
{emoji1, true},
|
|
|
|
{emoji2, false},
|
|
|
|
{emoji1 + emoji1, false},
|
|
|
|
{strings.Repeat("x", 100), false},
|
|
|
|
{strings.Repeat(emoji1, 100), false},
|
|
|
|
{strings.Repeat(emoji2, 100), false},
|
|
|
|
}
|
|
|
|
|
2024-01-25 01:40:57 -08:00
|
|
|
matcher := &zeroOrOneCharacterStringMatcher{matchNL: true}
|
2024-05-13 06:36:55 -07:00
|
|
|
b.ResetTimer()
|
|
|
|
|
|
|
|
for n := 0; n < b.N; n++ {
|
|
|
|
c := cases[n%len(cases)]
|
|
|
|
got := matcher.Matches(c.str)
|
|
|
|
if got != c.matches {
|
|
|
|
b.Fatalf("unexpected result for %q: got %t, want %t", c.str, got, c.matches)
|
|
|
|
}
|
|
|
|
}
|
2024-01-25 01:40:57 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestLiteralPrefixStringMatcher(t *testing.T) {
|
|
|
|
m := &literalPrefixStringMatcher{prefix: "mar", prefixCaseSensitive: true, right: &emptyStringMatcher{}}
|
|
|
|
require.True(t, m.Matches("mar"))
|
|
|
|
require.False(t, m.Matches("marco"))
|
|
|
|
require.False(t, m.Matches("ma"))
|
|
|
|
require.False(t, m.Matches("mAr"))
|
|
|
|
|
|
|
|
m = &literalPrefixStringMatcher{prefix: "mar", prefixCaseSensitive: false, right: &emptyStringMatcher{}}
|
|
|
|
require.True(t, m.Matches("mar"))
|
|
|
|
require.False(t, m.Matches("marco"))
|
|
|
|
require.False(t, m.Matches("ma"))
|
|
|
|
require.True(t, m.Matches("mAr"))
|
|
|
|
|
|
|
|
m = &literalPrefixStringMatcher{prefix: "mar", prefixCaseSensitive: true, right: &equalStringMatcher{s: "co", caseSensitive: false}}
|
|
|
|
require.True(t, m.Matches("marco"))
|
|
|
|
require.True(t, m.Matches("marCO"))
|
|
|
|
require.False(t, m.Matches("MARco"))
|
|
|
|
require.False(t, m.Matches("mar"))
|
|
|
|
require.False(t, m.Matches("marcopracucci"))
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestLiteralSuffixStringMatcher(t *testing.T) {
|
|
|
|
m := &literalSuffixStringMatcher{left: &emptyStringMatcher{}, suffix: "co", suffixCaseSensitive: true}
|
|
|
|
require.True(t, m.Matches("co"))
|
|
|
|
require.False(t, m.Matches("marco"))
|
|
|
|
require.False(t, m.Matches("coo"))
|
|
|
|
require.False(t, m.Matches("Co"))
|
|
|
|
|
|
|
|
m = &literalSuffixStringMatcher{left: &emptyStringMatcher{}, suffix: "co", suffixCaseSensitive: false}
|
|
|
|
require.True(t, m.Matches("co"))
|
|
|
|
require.False(t, m.Matches("marco"))
|
|
|
|
require.False(t, m.Matches("coo"))
|
|
|
|
require.True(t, m.Matches("Co"))
|
|
|
|
|
|
|
|
m = &literalSuffixStringMatcher{left: &equalStringMatcher{s: "mar", caseSensitive: false}, suffix: "co", suffixCaseSensitive: true}
|
|
|
|
require.True(t, m.Matches("marco"))
|
|
|
|
require.True(t, m.Matches("MARco"))
|
|
|
|
require.False(t, m.Matches("marCO"))
|
|
|
|
require.False(t, m.Matches("mar"))
|
|
|
|
require.False(t, m.Matches("marcopracucci"))
|
|
|
|
|
|
|
|
m = &literalSuffixStringMatcher{left: &equalStringMatcher{s: "mar", caseSensitive: false}, suffix: "co", suffixCaseSensitive: false}
|
|
|
|
require.True(t, m.Matches("marco"))
|
|
|
|
require.True(t, m.Matches("MARco"))
|
|
|
|
require.True(t, m.Matches("marCO"))
|
|
|
|
require.False(t, m.Matches("mar"))
|
|
|
|
require.False(t, m.Matches("marcopracucci"))
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestHasPrefixCaseInsensitive(t *testing.T) {
|
|
|
|
require.True(t, hasPrefixCaseInsensitive("marco", "mar"))
|
|
|
|
require.True(t, hasPrefixCaseInsensitive("mArco", "mar"))
|
|
|
|
require.True(t, hasPrefixCaseInsensitive("marco", "MaR"))
|
|
|
|
require.True(t, hasPrefixCaseInsensitive("marco", "marco"))
|
|
|
|
require.True(t, hasPrefixCaseInsensitive("mArco", "marco"))
|
|
|
|
|
|
|
|
require.False(t, hasPrefixCaseInsensitive("marco", "a"))
|
|
|
|
require.False(t, hasPrefixCaseInsensitive("marco", "abcdefghi"))
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestHasSuffixCaseInsensitive(t *testing.T) {
|
|
|
|
require.True(t, hasSuffixCaseInsensitive("marco", "rco"))
|
|
|
|
require.True(t, hasSuffixCaseInsensitive("marco", "RcO"))
|
|
|
|
require.True(t, hasSuffixCaseInsensitive("marco", "marco"))
|
|
|
|
require.False(t, hasSuffixCaseInsensitive("marco", "a"))
|
|
|
|
require.False(t, hasSuffixCaseInsensitive("marco", "abcdefghi"))
|
|
|
|
}
|
|
|
|
|
|
|
|
func getTestNameFromRegexp(re string) string {
|
|
|
|
if len(re) > 32 {
|
|
|
|
return re[:32]
|
|
|
|
}
|
|
|
|
return re
|
|
|
|
}
|
|
|
|
|
|
|
|
func generateRandomValues() []string {
|
|
|
|
// Init the random seed with a constant, so that it doesn't change between runs.
|
|
|
|
randGenerator := rand.New(rand.NewSource(1))
|
|
|
|
|
|
|
|
// Generate variable lengths random texts to match against.
|
|
|
|
texts := append([]string{}, randStrings(randGenerator, 10, 10)...)
|
|
|
|
texts = append(texts, randStrings(randGenerator, 5, 30)...)
|
|
|
|
texts = append(texts, randStrings(randGenerator, 1, 100)...)
|
|
|
|
texts = append(texts, "foo"+randString(randGenerator, 50))
|
|
|
|
texts = append(texts, randString(randGenerator, 50)+"foo")
|
|
|
|
|
|
|
|
return texts
|
|
|
|
}
|
|
|
|
|
|
|
|
func visitStringMatcher(matcher StringMatcher, callback func(matcher StringMatcher)) {
|
|
|
|
callback(matcher)
|
|
|
|
|
|
|
|
switch casted := matcher.(type) {
|
|
|
|
case *containsStringMatcher:
|
|
|
|
if casted.left != nil {
|
|
|
|
visitStringMatcher(casted.left, callback)
|
|
|
|
}
|
|
|
|
if casted.right != nil {
|
|
|
|
visitStringMatcher(casted.right, callback)
|
|
|
|
}
|
|
|
|
|
|
|
|
case *literalPrefixStringMatcher:
|
|
|
|
visitStringMatcher(casted.right, callback)
|
|
|
|
|
|
|
|
case *literalSuffixStringMatcher:
|
|
|
|
visitStringMatcher(casted.left, callback)
|
|
|
|
|
|
|
|
case orStringMatcher:
|
|
|
|
for _, entry := range casted {
|
|
|
|
visitStringMatcher(entry, callback)
|
|
|
|
}
|
2022-02-08 02:03:20 -08:00
|
|
|
|
2024-04-01 09:06:05 -07:00
|
|
|
// No nested matchers for the following ones.
|
2024-01-25 01:40:57 -08:00
|
|
|
case emptyStringMatcher:
|
|
|
|
case *equalStringMatcher:
|
|
|
|
case *equalMultiStringSliceMatcher:
|
|
|
|
case *equalMultiStringMapMatcher:
|
|
|
|
case anyStringWithoutNewlineMatcher:
|
|
|
|
case *anyNonEmptyStringMatcher:
|
|
|
|
case trueMatcher:
|
2022-02-08 02:03:20 -08:00
|
|
|
}
|
|
|
|
}
|