Swap rules lexer for much faster one.

This swaps github.com/kivikakk/golex for github.com/cznic/golex.

The old lexer would have taken 3.5 years to load a set of 5000 test rules
(quadratic time complexity for input length), whereas this one takes only 32ms.
Furthermore, since the new lexer is embedded differently, this gets rid of the
global parser variables and makes the rule loader fully reentrant without a
lock.
This commit is contained in:
Julius Volz 2013-07-11 18:38:44 +02:00
parent 2d538bf55a
commit 64b0ade171
7 changed files with 1450 additions and 808 deletions

View file

@ -17,7 +17,8 @@ parser.y.go: parser.y
go tool yacc -o parser.y.go -v "" parser.y go tool yacc -o parser.y.go -v "" parser.y
lexer.l.go: parser.y.go lexer.l lexer.l.go: parser.y.go lexer.l
golex lexer.l # This is golex from https://github.com/cznic/golex.
golex -o="lexer.l.go" lexer.l
clean: clean:
rm lexer.l.go parser.y.go rm lexer.l.go parser.y.go

View file

@ -1,25 +1,53 @@
/* Copyright 2013 Prometheus Team // Copyright 2013 Prometheus Team
* Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
* You may obtain a copy of the License at // You may obtain a copy of the License at
* //
* http: *www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
* //
* Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
* limitations under the License. */ // limitations under the License.
%{ %{
package rules package rules
import ( import (
"fmt"
"strconv" "strconv"
"strings" "strings"
clientmodel "github.com/prometheus/client_golang/model" clientmodel "github.com/prometheus/client_golang/model"
) )
// Lex is called by the parser generated by "go tool yacc" to obtain each
// token. The method is opened before the matching rules block and closed at
// the end of the file.
func (lexer *RulesLexer) Lex(lval *yySymType) int {
// Internal lexer states.
const (
S_INITIAL = iota
S_COMMENTS
)
// We simulate multiple start symbols for closely-related grammars via dummy tokens. See
// http://www.gnu.org/software/bison/manual/html_node/Multiple-start_002dsymbols.html
// Reason: we want to be able to parse lists of named rules as well as single expressions.
if lexer.startToken != 0 {
startToken := lexer.startToken
lexer.startToken = 0
return startToken
}
c := lexer.current
currentState := 0
if lexer.empty {
c, lexer.empty = lexer.getChar(), false
}
%} %}
D [0-9] D [0-9]
@ -28,44 +56,50 @@ U [smhdwy]
%x S_COMMENTS %x S_COMMENTS
%yyc c
%yyn c = lexer.getChar()
%yyt currentState
%% %%
. { yypos++; REJECT } lexer.buf = lexer.buf[:0] // The code before the first rule executed before every scan cycle (rule #0 / state 0 action)
\n { yyline++; yypos = 1; REJECT }
"/*" { BEGIN(S_COMMENTS) } "/*" currentState = S_COMMENTS
<S_COMMENTS>"*/" { BEGIN(0) } <S_COMMENTS>"*/" currentState = S_INITIAL
<S_COMMENTS>. { /* ignore chars within multi-line comments */ } <S_COMMENTS>.|\n /* ignore chars within multi-line comments */
\/\/[^\r\n]*\n { /* gobble up one-line comments */ } \/\/[^\r\n]*\n /* gobble up one-line comments */
ALERT|alert { return ALERT } ALERT|alert return ALERT
IF|if { return IF } IF|if return IF
FOR|for { return FOR } FOR|for return FOR
WITH|with { return WITH } WITH|with return WITH
PERMANENT|permanent { return PERMANENT } PERMANENT|permanent return PERMANENT
BY|by { return GROUP_OP } BY|by return GROUP_OP
AVG|SUM|MAX|MIN|COUNT { yylval.str = yytext; return AGGR_OP } AVG|SUM|MAX|MIN|COUNT lval.str = lexer.token(); return AGGR_OP
avg|sum|max|min|count { yylval.str = strings.ToUpper(yytext); return AGGR_OP } avg|sum|max|min|count lval.str = strings.ToUpper(lexer.token()); return AGGR_OP
\<|>|AND|OR|and|or { yylval.str = strings.ToUpper(yytext); return CMP_OP } \<|>|AND|OR|and|or lval.str = strings.ToUpper(lexer.token()); return CMP_OP
==|!=|>=|<= { yylval.str = yytext; return CMP_OP } ==|!=|>=|<= lval.str = lexer.token(); return CMP_OP
[+\-] { yylval.str = yytext; return ADDITIVE_OP } [+\-] lval.str = lexer.token(); return ADDITIVE_OP
[*/%] { yylval.str = yytext; return MULT_OP } [*/%] lval.str = lexer.token(); return MULT_OP
{D}+{U} { yylval.str = yytext; return DURATION } {D}+{U} lval.str = lexer.token(); return DURATION
{L}({L}|{D})* { yylval.str = yytext; return IDENTIFIER } {L}({L}|{D})* lval.str = lexer.token(); return IDENTIFIER
\-?{D}+(\.{D}*)? { num, err := strconv.ParseFloat(yytext, 64); \-?{D}+(\.{D}*)? num, err := strconv.ParseFloat(lexer.token(), 64);
if (err != nil && err.(*strconv.NumError).Err == strconv.ErrSyntax) { if (err != nil && err.(*strconv.NumError).Err == strconv.ErrSyntax) {
panic("Invalid float") panic("Invalid float")
} }
yylval.num = clientmodel.SampleValue(num) lval.num = clientmodel.SampleValue(num)
return NUMBER } return NUMBER
\"(\\.|[^\\"])*\" { yylval.str = yytext[1:len(yytext) - 1]; return STRING } \"(\\.|[^\\"])*\" lval.str = lexer.token()[1:len(lexer.token()) - 1]; return STRING
\'(\\.|[^\\'])*\' { yylval.str = yytext[1:len(yytext) - 1]; return STRING } \'(\\.|[^\\'])*\' lval.str = lexer.token()[1:len(lexer.token()) - 1]; return STRING
[{}\[\]()=,] { return int(yytext[0]) } [{}\[\]()=,] return int(lexer.buf[0])
. { /* don't print any remaining chars (whitespace) */ } [\t\n\r ] /* gobble up any whitespace */
\n { /* don't print any remaining chars (whitespace) */ }
%% %%
lexer.empty = true
return int(c)
}

File diff suppressed because it is too large Load diff

View file

@ -14,69 +14,87 @@
package rules package rules
import ( import (
"bufio"
"errors" "errors"
"fmt" "fmt"
"github.com/prometheus/prometheus/rules/ast"
"io" "io"
"log"
"os" "os"
"strings" "strings"
"sync"
)
// GoLex sadly needs these global variables for storing temporary token/parsing information. "github.com/prometheus/prometheus/rules/ast"
var (
yylval *yySymType // For storing extra token information, like the contents of a string.
yyline int // Line number within the current file or buffer.
yypos int // Character position within the current line.
parseMutex sync.Mutex // Mutex protecting the parsing-related global state defined above.
) )
type RulesLexer struct { type RulesLexer struct {
errors []string // Errors encountered during parsing. // Errors encountered during parsing.
startToken int // Dummy token to simulate multiple start symbols (see below). errors []string
parsedRules []Rule // Parsed full rules. // Dummy token to simulate multiple start symbols (see below).
parsedExpr ast.Node // Parsed single expression. startToken int
} // Parsed full rules.
parsedRules []Rule
// Parsed single expression.
parsedExpr ast.Node
func (lexer *RulesLexer) Lex(lval *yySymType) int { // Current character.
yylval = lval current byte
// Current token buffer.
buf []byte
// Input text.
src *bufio.Reader
// Whether we have a current char.
empty bool
// We simulate multiple start symbols for closely-related grammars via dummy tokens. See // Current input line.
// http://www.gnu.org/software/bison/manual/html_node/Multiple-start_002dsymbols.html line int
// Reason: we want to be able to parse lists of named rules as well as single expressions. // Current character position within the current input line.
if lexer.startToken != 0 { pos int
startToken := lexer.startToken
lexer.startToken = 0
return startToken
}
tokenType := yylex()
return tokenType
} }
func (lexer *RulesLexer) Error(errorStr string) { func (lexer *RulesLexer) Error(errorStr string) {
err := fmt.Sprintf("Error parsing rules at line %v, char %v: %v", yyline, yypos, errorStr) err := fmt.Sprintf("Error parsing rules at line %v, char %v: %v", lexer.line, lexer.pos, errorStr)
lexer.errors = append(lexer.errors, err) lexer.errors = append(lexer.errors, err)
} }
func LoadFromReader(rulesReader io.Reader, singleExpr bool) (interface{}, error) { func (lexer *RulesLexer) getChar() byte {
parseMutex.Lock() if lexer.current != 0 {
defer parseMutex.Unlock() lexer.buf = append(lexer.buf, lexer.current)
}
lexer.current = 0
if b, err := lexer.src.ReadByte(); err == nil {
if b == '\n' {
lexer.line++
lexer.pos = 0
} else {
lexer.pos++
}
lexer.current = b
} else if err != io.EOF {
log.Fatal(err)
}
return lexer.current
}
yyin = rulesReader func (lexer *RulesLexer) token() string {
yypos = 1 return string(lexer.buf)
yyline = 1 }
yydata = ""
yytext = ""
func newRulesLexer(src io.Reader, singleExpr bool) *RulesLexer {
lexer := &RulesLexer{ lexer := &RulesLexer{
startToken: START_RULES, startToken: START_RULES,
src: bufio.NewReader(src),
pos: 1,
line: 1,
} }
if singleExpr { if singleExpr {
lexer.startToken = START_EXPRESSION lexer.startToken = START_EXPRESSION
} }
lexer.getChar()
return lexer
}
func LoadFromReader(rulesReader io.Reader, singleExpr bool) (interface{}, error) {
lexer := newRulesLexer(rulesReader, singleExpr)
ret := yyParse(lexer) ret := yyParse(lexer)
if ret != 0 && len(lexer.errors) == 0 { if ret != 0 && len(lexer.errors) == 0 {
lexer.Error("Unknown parser error") lexer.Error("Unknown parser error")

View file

@ -14,10 +14,10 @@
%{ %{
package rules package rules
import ( import (
clientmodel "github.com/prometheus/client_golang/model" clientmodel "github.com/prometheus/client_golang/model"
import "github.com/prometheus/prometheus/rules/ast" "github.com/prometheus/prometheus/rules/ast"
) )
%} %}

View file

@ -1,22 +1,25 @@
//line parser.y:15
package rules
//line parser.y:15
package rules
import __yyfmt__ "fmt" import __yyfmt__ "fmt"
//line parser.y:15 //line parser.y:15
import clientmodel "github.com/prometheus/client_golang/model"
import "github.com/prometheus/prometheus/rules/ast"
//line parser.y:21 import (
clientmodel "github.com/prometheus/client_golang/model"
"github.com/prometheus/prometheus/rules/ast"
)
//line parser.y:24
type yySymType struct { type yySymType struct {
yys int yys int
num clientmodel.SampleValue num clientmodel.SampleValue
str string str string
ruleNode ast.Node ruleNode ast.Node
ruleNodeSlice []ast.Node ruleNodeSlice []ast.Node
boolean bool boolean bool
labelNameSlice clientmodel.LabelNames labelNameSlice clientmodel.LabelNames
labelSet clientmodel.LabelSet labelSet clientmodel.LabelSet
} }
const START_RULES = 57346 const START_RULES = 57346
@ -61,7 +64,8 @@ const yyEofCode = 1
const yyErrCode = 2 const yyErrCode = 2
const yyMaxDepth = 200 const yyMaxDepth = 200
//line parser.y:188 //line parser.y:191
//line yacctab:1 //line yacctab:1
var yyExca = []int{ var yyExca = []int{
@ -394,208 +398,134 @@ yydefault:
switch yynt { switch yynt {
case 5: case 5:
//line parser.y:66 //line parser.y:69
{ { yylex.(*RulesLexer).parsedExpr = yyS[yypt-0].ruleNode }
yylex.(*RulesLexer).parsedExpr = yyS[yypt-0].ruleNode
}
case 6: case 6:
//line parser.y:70 //line parser.y:73
{ {
rule, err := CreateRecordingRule(yyS[yypt-3].str, yyS[yypt-2].labelSet, yyS[yypt-0].ruleNode, yyS[yypt-4].boolean) rule, err := CreateRecordingRule(yyS[yypt-3].str, yyS[yypt-2].labelSet, yyS[yypt-0].ruleNode, yyS[yypt-4].boolean)
if err != nil { if err != nil { yylex.Error(err.Error()); return 1 }
yylex.Error(err.Error()) yylex.(*RulesLexer).parsedRules = append(yylex.(*RulesLexer).parsedRules, rule)
return 1 }
}
yylex.(*RulesLexer).parsedRules = append(yylex.(*RulesLexer).parsedRules, rule)
}
case 7: case 7:
//line parser.y:76 //line parser.y:79
{ {
rule, err := CreateAlertingRule(yyS[yypt-5].str, yyS[yypt-3].ruleNode, yyS[yypt-2].str, yyS[yypt-0].labelSet) rule, err := CreateAlertingRule(yyS[yypt-5].str, yyS[yypt-3].ruleNode, yyS[yypt-2].str, yyS[yypt-0].labelSet)
if err != nil { if err != nil { yylex.Error(err.Error()); return 1 }
yylex.Error(err.Error()) yylex.(*RulesLexer).parsedRules = append(yylex.(*RulesLexer).parsedRules, rule)
return 1 }
}
yylex.(*RulesLexer).parsedRules = append(yylex.(*RulesLexer).parsedRules, rule)
}
case 8: case 8:
//line parser.y:84 //line parser.y:87
{ { yyVAL.str = "0s" }
yyVAL.str = "0s"
}
case 9: case 9:
//line parser.y:86 //line parser.y:89
{ { yyVAL.str = yyS[yypt-0].str }
yyVAL.str = yyS[yypt-0].str
}
case 10: case 10:
//line parser.y:90 //line parser.y:93
{ { yyVAL.boolean = false }
yyVAL.boolean = false
}
case 11: case 11:
//line parser.y:92 //line parser.y:95
{ { yyVAL.boolean = true }
yyVAL.boolean = true
}
case 12: case 12:
//line parser.y:96 //line parser.y:99
{ { yyVAL.labelSet = clientmodel.LabelSet{} }
yyVAL.labelSet = clientmodel.LabelSet{}
}
case 13: case 13:
//line parser.y:98 //line parser.y:101
{ { yyVAL.labelSet = yyS[yypt-1].labelSet }
yyVAL.labelSet = yyS[yypt-1].labelSet
}
case 14: case 14:
//line parser.y:100
{
yyVAL.labelSet = clientmodel.LabelSet{}
}
case 15:
//line parser.y:103 //line parser.y:103
{ { yyVAL.labelSet = clientmodel.LabelSet{} }
yyVAL.labelSet = yyS[yypt-0].labelSet case 15:
} //line parser.y:106
{ yyVAL.labelSet = yyS[yypt-0].labelSet }
case 16: case 16:
//line parser.y:105 //line parser.y:108
{ { for k, v := range yyS[yypt-0].labelSet { yyVAL.labelSet[k] = v } }
for k, v := range yyS[yypt-0].labelSet {
yyVAL.labelSet[k] = v
}
}
case 17: case 17:
//line parser.y:109 //line parser.y:112
{ { yyVAL.labelSet = clientmodel.LabelSet{ clientmodel.LabelName(yyS[yypt-2].str): clientmodel.LabelValue(yyS[yypt-0].str) } }
yyVAL.labelSet = clientmodel.LabelSet{clientmodel.LabelName(yyS[yypt-2].str): clientmodel.LabelValue(yyS[yypt-0].str)}
}
case 18: case 18:
//line parser.y:114 //line parser.y:117
{ { yyVAL.ruleNode = yyS[yypt-1].ruleNode }
yyVAL.ruleNode = yyS[yypt-1].ruleNode
}
case 19: case 19:
//line parser.y:116 //line parser.y:119
{ { yyS[yypt-0].labelSet[clientmodel.MetricNameLabel] = clientmodel.LabelValue(yyS[yypt-1].str); yyVAL.ruleNode = ast.NewVectorLiteral(yyS[yypt-0].labelSet) }
yyS[yypt-0].labelSet[clientmodel.MetricNameLabel] = clientmodel.LabelValue(yyS[yypt-1].str)
yyVAL.ruleNode = ast.NewVectorLiteral(yyS[yypt-0].labelSet)
}
case 20: case 20:
//line parser.y:118 //line parser.y:121
{ {
var err error var err error
yyVAL.ruleNode, err = NewFunctionCall(yyS[yypt-3].str, yyS[yypt-1].ruleNodeSlice) yyVAL.ruleNode, err = NewFunctionCall(yyS[yypt-3].str, yyS[yypt-1].ruleNodeSlice)
if err != nil { if err != nil { yylex.Error(err.Error()); return 1 }
yylex.Error(err.Error()) }
return 1
}
}
case 21: case 21:
//line parser.y:124 //line parser.y:127
{ {
var err error var err error
yyVAL.ruleNode, err = NewFunctionCall(yyS[yypt-2].str, []ast.Node{}) yyVAL.ruleNode, err = NewFunctionCall(yyS[yypt-2].str, []ast.Node{})
if err != nil { if err != nil { yylex.Error(err.Error()); return 1 }
yylex.Error(err.Error()) }
return 1
}
}
case 22: case 22:
//line parser.y:130 //line parser.y:133
{ {
var err error var err error
yyVAL.ruleNode, err = NewMatrix(yyS[yypt-3].ruleNode, yyS[yypt-1].str) yyVAL.ruleNode, err = NewMatrix(yyS[yypt-3].ruleNode, yyS[yypt-1].str)
if err != nil { if err != nil { yylex.Error(err.Error()); return 1 }
yylex.Error(err.Error()) }
return 1
}
}
case 23: case 23:
//line parser.y:136 //line parser.y:139
{ {
var err error var err error
yyVAL.ruleNode, err = NewVectorAggregation(yyS[yypt-4].str, yyS[yypt-2].ruleNode, yyS[yypt-0].labelNameSlice) yyVAL.ruleNode, err = NewVectorAggregation(yyS[yypt-4].str, yyS[yypt-2].ruleNode, yyS[yypt-0].labelNameSlice)
if err != nil { if err != nil { yylex.Error(err.Error()); return 1 }
yylex.Error(err.Error()) }
return 1
}
}
case 24: case 24:
//line parser.y:144 //line parser.y:147
{ {
var err error var err error
yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode) yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode)
if err != nil { if err != nil { yylex.Error(err.Error()); return 1 }
yylex.Error(err.Error()) }
return 1
}
}
case 25: case 25:
//line parser.y:150 //line parser.y:153
{ {
var err error var err error
yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode) yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode)
if err != nil { if err != nil { yylex.Error(err.Error()); return 1 }
yylex.Error(err.Error()) }
return 1
}
}
case 26: case 26:
//line parser.y:156 //line parser.y:159
{ {
var err error var err error
yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode) yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode)
if err != nil { if err != nil { yylex.Error(err.Error()); return 1 }
yylex.Error(err.Error()) }
return 1
}
}
case 27: case 27:
//line parser.y:162 //line parser.y:165
{ { yyVAL.ruleNode = ast.NewScalarLiteral(yyS[yypt-0].num)}
yyVAL.ruleNode = ast.NewScalarLiteral(yyS[yypt-0].num)
}
case 28: case 28:
//line parser.y:166 //line parser.y:169
{ { yyVAL.labelNameSlice = clientmodel.LabelNames{} }
yyVAL.labelNameSlice = clientmodel.LabelNames{}
}
case 29: case 29:
//line parser.y:168 //line parser.y:171
{ { yyVAL.labelNameSlice = yyS[yypt-1].labelNameSlice }
yyVAL.labelNameSlice = yyS[yypt-1].labelNameSlice
}
case 30: case 30:
//line parser.y:172 //line parser.y:175
{ { yyVAL.labelNameSlice = clientmodel.LabelNames{clientmodel.LabelName(yyS[yypt-0].str)} }
yyVAL.labelNameSlice = clientmodel.LabelNames{clientmodel.LabelName(yyS[yypt-0].str)}
}
case 31: case 31:
//line parser.y:174 //line parser.y:177
{ { yyVAL.labelNameSlice = append(yyVAL.labelNameSlice, clientmodel.LabelName(yyS[yypt-0].str)) }
yyVAL.labelNameSlice = append(yyVAL.labelNameSlice, clientmodel.LabelName(yyS[yypt-0].str))
}
case 32: case 32:
//line parser.y:178 //line parser.y:181
{ { yyVAL.ruleNodeSlice = []ast.Node{yyS[yypt-0].ruleNode} }
yyVAL.ruleNodeSlice = []ast.Node{yyS[yypt-0].ruleNode}
}
case 33: case 33:
//line parser.y:180 //line parser.y:183
{ { yyVAL.ruleNodeSlice = append(yyVAL.ruleNodeSlice, yyS[yypt-0].ruleNode) }
yyVAL.ruleNodeSlice = append(yyVAL.ruleNodeSlice, yyS[yypt-0].ruleNode)
}
case 34: case 34:
//line parser.y:184 //line parser.y:187
{ { yyVAL.ruleNode = yyS[yypt-0].ruleNode }
yyVAL.ruleNode = yyS[yypt-0].ruleNode
}
case 35: case 35:
//line parser.y:186 //line parser.y:189
{ { yyVAL.ruleNode = ast.NewStringLiteral(yyS[yypt-0].str) }
yyVAL.ruleNode = ast.NewStringLiteral(yyS[yypt-0].str)
}
} }
goto yystack /* stack new state and value */ goto yystack /* stack new state and value */
} }

View file

@ -123,6 +123,17 @@ func TestExpressions(t *testing.T) {
}, },
fullRanges: 0, fullRanges: 0,
intervalRanges: 8, intervalRanges: 8,
}, {
expr: `
// Test comment.
SUM(http_requests) BY /* comments shouldn't
have any effect */ (job) // another comment`,
output: []string{
`http_requests{job="api-server"} => 1000 @[%v]`,
`http_requests{job="app-server"} => 2600 @[%v]`,
},
fullRanges: 0,
intervalRanges: 8,
}, { }, {
expr: `COUNT(http_requests) BY (job)`, expr: `COUNT(http_requests) BY (job)`,
output: []string{ output: []string{
@ -448,7 +459,7 @@ var ruleTests = []struct {
{ {
inputFile: "syntax_error.rules", inputFile: "syntax_error.rules",
shouldFail: true, shouldFail: true,
errContains: "Error parsing rules at line 3", errContains: "Error parsing rules at line 5",
}, },
{ {
inputFile: "non_vector.rules", inputFile: "non_vector.rules",