Merge pull request #327 from prometheus/optimize/faster-lexer

Swap rules lexer for much faster one.
This commit is contained in:
juliusv 2013-07-11 11:08:20 -07:00
commit 8f0a3a060c
7 changed files with 1450 additions and 808 deletions

View file

@ -17,7 +17,8 @@ parser.y.go: parser.y
go tool yacc -o parser.y.go -v "" parser.y go tool yacc -o parser.y.go -v "" parser.y
lexer.l.go: parser.y.go lexer.l lexer.l.go: parser.y.go lexer.l
golex lexer.l # This is golex from https://github.com/cznic/golex.
golex -o="lexer.l.go" lexer.l
clean: clean:
rm lexer.l.go parser.y.go rm lexer.l.go parser.y.go

View file

@ -1,25 +1,53 @@
/* Copyright 2013 Prometheus Team // Copyright 2013 Prometheus Team
* Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
* You may obtain a copy of the License at // You may obtain a copy of the License at
* //
* http: *www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
* //
* Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
* limitations under the License. */ // limitations under the License.
%{ %{
package rules package rules
import ( import (
"fmt"
"strconv" "strconv"
"strings" "strings"
clientmodel "github.com/prometheus/client_golang/model" clientmodel "github.com/prometheus/client_golang/model"
) )
// Lex is called by the parser generated by "go tool yacc" to obtain each
// token. The method is opened before the matching rules block and closed at
// the end of the file.
func (lexer *RulesLexer) Lex(lval *yySymType) int {
// Internal lexer states.
const (
S_INITIAL = iota
S_COMMENTS
)
// We simulate multiple start symbols for closely-related grammars via dummy tokens. See
// http://www.gnu.org/software/bison/manual/html_node/Multiple-start_002dsymbols.html
// Reason: we want to be able to parse lists of named rules as well as single expressions.
if lexer.startToken != 0 {
startToken := lexer.startToken
lexer.startToken = 0
return startToken
}
c := lexer.current
currentState := 0
if lexer.empty {
c, lexer.empty = lexer.getChar(), false
}
%} %}
D [0-9] D [0-9]
@ -28,44 +56,50 @@ U [smhdwy]
%x S_COMMENTS %x S_COMMENTS
%yyc c
%yyn c = lexer.getChar()
%yyt currentState
%% %%
. { yypos++; REJECT } lexer.buf = lexer.buf[:0] // The code before the first rule executed before every scan cycle (rule #0 / state 0 action)
\n { yyline++; yypos = 1; REJECT }
"/*" { BEGIN(S_COMMENTS) } "/*" currentState = S_COMMENTS
<S_COMMENTS>"*/" { BEGIN(0) } <S_COMMENTS>"*/" currentState = S_INITIAL
<S_COMMENTS>. { /* ignore chars within multi-line comments */ } <S_COMMENTS>.|\n /* ignore chars within multi-line comments */
\/\/[^\r\n]*\n { /* gobble up one-line comments */ } \/\/[^\r\n]*\n /* gobble up one-line comments */
ALERT|alert { return ALERT } ALERT|alert return ALERT
IF|if { return IF } IF|if return IF
FOR|for { return FOR } FOR|for return FOR
WITH|with { return WITH } WITH|with return WITH
PERMANENT|permanent { return PERMANENT } PERMANENT|permanent return PERMANENT
BY|by { return GROUP_OP } BY|by return GROUP_OP
AVG|SUM|MAX|MIN|COUNT { yylval.str = yytext; return AGGR_OP } AVG|SUM|MAX|MIN|COUNT lval.str = lexer.token(); return AGGR_OP
avg|sum|max|min|count { yylval.str = strings.ToUpper(yytext); return AGGR_OP } avg|sum|max|min|count lval.str = strings.ToUpper(lexer.token()); return AGGR_OP
\<|>|AND|OR|and|or { yylval.str = strings.ToUpper(yytext); return CMP_OP } \<|>|AND|OR|and|or lval.str = strings.ToUpper(lexer.token()); return CMP_OP
==|!=|>=|<= { yylval.str = yytext; return CMP_OP } ==|!=|>=|<= lval.str = lexer.token(); return CMP_OP
[+\-] { yylval.str = yytext; return ADDITIVE_OP } [+\-] lval.str = lexer.token(); return ADDITIVE_OP
[*/%] { yylval.str = yytext; return MULT_OP } [*/%] lval.str = lexer.token(); return MULT_OP
{D}+{U} { yylval.str = yytext; return DURATION } {D}+{U} lval.str = lexer.token(); return DURATION
{L}({L}|{D})* { yylval.str = yytext; return IDENTIFIER } {L}({L}|{D})* lval.str = lexer.token(); return IDENTIFIER
\-?{D}+(\.{D}*)? { num, err := strconv.ParseFloat(yytext, 64); \-?{D}+(\.{D}*)? num, err := strconv.ParseFloat(lexer.token(), 64);
if (err != nil && err.(*strconv.NumError).Err == strconv.ErrSyntax) { if (err != nil && err.(*strconv.NumError).Err == strconv.ErrSyntax) {
panic("Invalid float") panic("Invalid float")
} }
yylval.num = clientmodel.SampleValue(num) lval.num = clientmodel.SampleValue(num)
return NUMBER } return NUMBER
\"(\\.|[^\\"])*\" { yylval.str = yytext[1:len(yytext) - 1]; return STRING } \"(\\.|[^\\"])*\" lval.str = lexer.token()[1:len(lexer.token()) - 1]; return STRING
\'(\\.|[^\\'])*\' { yylval.str = yytext[1:len(yytext) - 1]; return STRING } \'(\\.|[^\\'])*\' lval.str = lexer.token()[1:len(lexer.token()) - 1]; return STRING
[{}\[\]()=,] { return int(yytext[0]) } [{}\[\]()=,] return int(lexer.buf[0])
. { /* don't print any remaining chars (whitespace) */ } [\t\n\r ] /* gobble up any whitespace */
\n { /* don't print any remaining chars (whitespace) */ }
%% %%
lexer.empty = true
return int(c)
}

File diff suppressed because it is too large Load diff

View file

@ -14,69 +14,87 @@
package rules package rules
import ( import (
"bufio"
"errors" "errors"
"fmt" "fmt"
"github.com/prometheus/prometheus/rules/ast"
"io" "io"
"log"
"os" "os"
"strings" "strings"
"sync"
)
// GoLex sadly needs these global variables for storing temporary token/parsing information. "github.com/prometheus/prometheus/rules/ast"
var (
yylval *yySymType // For storing extra token information, like the contents of a string.
yyline int // Line number within the current file or buffer.
yypos int // Character position within the current line.
parseMutex sync.Mutex // Mutex protecting the parsing-related global state defined above.
) )
type RulesLexer struct { type RulesLexer struct {
errors []string // Errors encountered during parsing. // Errors encountered during parsing.
startToken int // Dummy token to simulate multiple start symbols (see below). errors []string
parsedRules []Rule // Parsed full rules. // Dummy token to simulate multiple start symbols (see below).
parsedExpr ast.Node // Parsed single expression. startToken int
} // Parsed full rules.
parsedRules []Rule
// Parsed single expression.
parsedExpr ast.Node
func (lexer *RulesLexer) Lex(lval *yySymType) int { // Current character.
yylval = lval current byte
// Current token buffer.
buf []byte
// Input text.
src *bufio.Reader
// Whether we have a current char.
empty bool
// We simulate multiple start symbols for closely-related grammars via dummy tokens. See // Current input line.
// http://www.gnu.org/software/bison/manual/html_node/Multiple-start_002dsymbols.html line int
// Reason: we want to be able to parse lists of named rules as well as single expressions. // Current character position within the current input line.
if lexer.startToken != 0 { pos int
startToken := lexer.startToken
lexer.startToken = 0
return startToken
}
tokenType := yylex()
return tokenType
} }
func (lexer *RulesLexer) Error(errorStr string) { func (lexer *RulesLexer) Error(errorStr string) {
err := fmt.Sprintf("Error parsing rules at line %v, char %v: %v", yyline, yypos, errorStr) err := fmt.Sprintf("Error parsing rules at line %v, char %v: %v", lexer.line, lexer.pos, errorStr)
lexer.errors = append(lexer.errors, err) lexer.errors = append(lexer.errors, err)
} }
func LoadFromReader(rulesReader io.Reader, singleExpr bool) (interface{}, error) { func (lexer *RulesLexer) getChar() byte {
parseMutex.Lock() if lexer.current != 0 {
defer parseMutex.Unlock() lexer.buf = append(lexer.buf, lexer.current)
}
lexer.current = 0
if b, err := lexer.src.ReadByte(); err == nil {
if b == '\n' {
lexer.line++
lexer.pos = 0
} else {
lexer.pos++
}
lexer.current = b
} else if err != io.EOF {
log.Fatal(err)
}
return lexer.current
}
yyin = rulesReader func (lexer *RulesLexer) token() string {
yypos = 1 return string(lexer.buf)
yyline = 1 }
yydata = ""
yytext = ""
func newRulesLexer(src io.Reader, singleExpr bool) *RulesLexer {
lexer := &RulesLexer{ lexer := &RulesLexer{
startToken: START_RULES, startToken: START_RULES,
src: bufio.NewReader(src),
pos: 1,
line: 1,
} }
if singleExpr { if singleExpr {
lexer.startToken = START_EXPRESSION lexer.startToken = START_EXPRESSION
} }
lexer.getChar()
return lexer
}
func LoadFromReader(rulesReader io.Reader, singleExpr bool) (interface{}, error) {
lexer := newRulesLexer(rulesReader, singleExpr)
ret := yyParse(lexer) ret := yyParse(lexer)
if ret != 0 && len(lexer.errors) == 0 { if ret != 0 && len(lexer.errors) == 0 {
lexer.Error("Unknown parser error") lexer.Error("Unknown parser error")

View file

@ -14,10 +14,10 @@
%{ %{
package rules package rules
import ( import (
clientmodel "github.com/prometheus/client_golang/model" clientmodel "github.com/prometheus/client_golang/model"
import "github.com/prometheus/prometheus/rules/ast" "github.com/prometheus/prometheus/rules/ast"
) )
%} %}

View file

@ -1,22 +1,25 @@
//line parser.y:15
package rules
//line parser.y:15
package rules
import __yyfmt__ "fmt" import __yyfmt__ "fmt"
//line parser.y:15 //line parser.y:15
import clientmodel "github.com/prometheus/client_golang/model"
import "github.com/prometheus/prometheus/rules/ast"
//line parser.y:21 import (
clientmodel "github.com/prometheus/client_golang/model"
"github.com/prometheus/prometheus/rules/ast"
)
//line parser.y:24
type yySymType struct { type yySymType struct {
yys int yys int
num clientmodel.SampleValue num clientmodel.SampleValue
str string str string
ruleNode ast.Node ruleNode ast.Node
ruleNodeSlice []ast.Node ruleNodeSlice []ast.Node
boolean bool boolean bool
labelNameSlice clientmodel.LabelNames labelNameSlice clientmodel.LabelNames
labelSet clientmodel.LabelSet labelSet clientmodel.LabelSet
} }
const START_RULES = 57346 const START_RULES = 57346
@ -61,7 +64,8 @@ const yyEofCode = 1
const yyErrCode = 2 const yyErrCode = 2
const yyMaxDepth = 200 const yyMaxDepth = 200
//line parser.y:188 //line parser.y:191
//line yacctab:1 //line yacctab:1
var yyExca = []int{ var yyExca = []int{
@ -394,208 +398,134 @@ yydefault:
switch yynt { switch yynt {
case 5: case 5:
//line parser.y:66 //line parser.y:69
{ { yylex.(*RulesLexer).parsedExpr = yyS[yypt-0].ruleNode }
yylex.(*RulesLexer).parsedExpr = yyS[yypt-0].ruleNode
}
case 6: case 6:
//line parser.y:70 //line parser.y:73
{ {
rule, err := CreateRecordingRule(yyS[yypt-3].str, yyS[yypt-2].labelSet, yyS[yypt-0].ruleNode, yyS[yypt-4].boolean) rule, err := CreateRecordingRule(yyS[yypt-3].str, yyS[yypt-2].labelSet, yyS[yypt-0].ruleNode, yyS[yypt-4].boolean)
if err != nil { if err != nil { yylex.Error(err.Error()); return 1 }
yylex.Error(err.Error()) yylex.(*RulesLexer).parsedRules = append(yylex.(*RulesLexer).parsedRules, rule)
return 1 }
}
yylex.(*RulesLexer).parsedRules = append(yylex.(*RulesLexer).parsedRules, rule)
}
case 7: case 7:
//line parser.y:76 //line parser.y:79
{ {
rule, err := CreateAlertingRule(yyS[yypt-5].str, yyS[yypt-3].ruleNode, yyS[yypt-2].str, yyS[yypt-0].labelSet) rule, err := CreateAlertingRule(yyS[yypt-5].str, yyS[yypt-3].ruleNode, yyS[yypt-2].str, yyS[yypt-0].labelSet)
if err != nil { if err != nil { yylex.Error(err.Error()); return 1 }
yylex.Error(err.Error()) yylex.(*RulesLexer).parsedRules = append(yylex.(*RulesLexer).parsedRules, rule)
return 1 }
}
yylex.(*RulesLexer).parsedRules = append(yylex.(*RulesLexer).parsedRules, rule)
}
case 8: case 8:
//line parser.y:84 //line parser.y:87
{ { yyVAL.str = "0s" }
yyVAL.str = "0s"
}
case 9: case 9:
//line parser.y:86 //line parser.y:89
{ { yyVAL.str = yyS[yypt-0].str }
yyVAL.str = yyS[yypt-0].str
}
case 10: case 10:
//line parser.y:90 //line parser.y:93
{ { yyVAL.boolean = false }
yyVAL.boolean = false
}
case 11: case 11:
//line parser.y:92 //line parser.y:95
{ { yyVAL.boolean = true }
yyVAL.boolean = true
}
case 12: case 12:
//line parser.y:96 //line parser.y:99
{ { yyVAL.labelSet = clientmodel.LabelSet{} }
yyVAL.labelSet = clientmodel.LabelSet{}
}
case 13: case 13:
//line parser.y:98 //line parser.y:101
{ { yyVAL.labelSet = yyS[yypt-1].labelSet }
yyVAL.labelSet = yyS[yypt-1].labelSet
}
case 14: case 14:
//line parser.y:100
{
yyVAL.labelSet = clientmodel.LabelSet{}
}
case 15:
//line parser.y:103 //line parser.y:103
{ { yyVAL.labelSet = clientmodel.LabelSet{} }
yyVAL.labelSet = yyS[yypt-0].labelSet case 15:
} //line parser.y:106
{ yyVAL.labelSet = yyS[yypt-0].labelSet }
case 16: case 16:
//line parser.y:105 //line parser.y:108
{ { for k, v := range yyS[yypt-0].labelSet { yyVAL.labelSet[k] = v } }
for k, v := range yyS[yypt-0].labelSet {
yyVAL.labelSet[k] = v
}
}
case 17: case 17:
//line parser.y:109 //line parser.y:112
{ { yyVAL.labelSet = clientmodel.LabelSet{ clientmodel.LabelName(yyS[yypt-2].str): clientmodel.LabelValue(yyS[yypt-0].str) } }
yyVAL.labelSet = clientmodel.LabelSet{clientmodel.LabelName(yyS[yypt-2].str): clientmodel.LabelValue(yyS[yypt-0].str)}
}
case 18: case 18:
//line parser.y:114 //line parser.y:117
{ { yyVAL.ruleNode = yyS[yypt-1].ruleNode }
yyVAL.ruleNode = yyS[yypt-1].ruleNode
}
case 19: case 19:
//line parser.y:116 //line parser.y:119
{ { yyS[yypt-0].labelSet[clientmodel.MetricNameLabel] = clientmodel.LabelValue(yyS[yypt-1].str); yyVAL.ruleNode = ast.NewVectorLiteral(yyS[yypt-0].labelSet) }
yyS[yypt-0].labelSet[clientmodel.MetricNameLabel] = clientmodel.LabelValue(yyS[yypt-1].str)
yyVAL.ruleNode = ast.NewVectorLiteral(yyS[yypt-0].labelSet)
}
case 20: case 20:
//line parser.y:118 //line parser.y:121
{ {
var err error var err error
yyVAL.ruleNode, err = NewFunctionCall(yyS[yypt-3].str, yyS[yypt-1].ruleNodeSlice) yyVAL.ruleNode, err = NewFunctionCall(yyS[yypt-3].str, yyS[yypt-1].ruleNodeSlice)
if err != nil { if err != nil { yylex.Error(err.Error()); return 1 }
yylex.Error(err.Error()) }
return 1
}
}
case 21: case 21:
//line parser.y:124 //line parser.y:127
{ {
var err error var err error
yyVAL.ruleNode, err = NewFunctionCall(yyS[yypt-2].str, []ast.Node{}) yyVAL.ruleNode, err = NewFunctionCall(yyS[yypt-2].str, []ast.Node{})
if err != nil { if err != nil { yylex.Error(err.Error()); return 1 }
yylex.Error(err.Error()) }
return 1
}
}
case 22: case 22:
//line parser.y:130 //line parser.y:133
{ {
var err error var err error
yyVAL.ruleNode, err = NewMatrix(yyS[yypt-3].ruleNode, yyS[yypt-1].str) yyVAL.ruleNode, err = NewMatrix(yyS[yypt-3].ruleNode, yyS[yypt-1].str)
if err != nil { if err != nil { yylex.Error(err.Error()); return 1 }
yylex.Error(err.Error()) }
return 1
}
}
case 23: case 23:
//line parser.y:136 //line parser.y:139
{ {
var err error var err error
yyVAL.ruleNode, err = NewVectorAggregation(yyS[yypt-4].str, yyS[yypt-2].ruleNode, yyS[yypt-0].labelNameSlice) yyVAL.ruleNode, err = NewVectorAggregation(yyS[yypt-4].str, yyS[yypt-2].ruleNode, yyS[yypt-0].labelNameSlice)
if err != nil { if err != nil { yylex.Error(err.Error()); return 1 }
yylex.Error(err.Error()) }
return 1
}
}
case 24: case 24:
//line parser.y:144 //line parser.y:147
{ {
var err error var err error
yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode) yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode)
if err != nil { if err != nil { yylex.Error(err.Error()); return 1 }
yylex.Error(err.Error()) }
return 1
}
}
case 25: case 25:
//line parser.y:150 //line parser.y:153
{ {
var err error var err error
yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode) yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode)
if err != nil { if err != nil { yylex.Error(err.Error()); return 1 }
yylex.Error(err.Error()) }
return 1
}
}
case 26: case 26:
//line parser.y:156 //line parser.y:159
{ {
var err error var err error
yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode) yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode)
if err != nil { if err != nil { yylex.Error(err.Error()); return 1 }
yylex.Error(err.Error()) }
return 1
}
}
case 27: case 27:
//line parser.y:162 //line parser.y:165
{ { yyVAL.ruleNode = ast.NewScalarLiteral(yyS[yypt-0].num)}
yyVAL.ruleNode = ast.NewScalarLiteral(yyS[yypt-0].num)
}
case 28: case 28:
//line parser.y:166 //line parser.y:169
{ { yyVAL.labelNameSlice = clientmodel.LabelNames{} }
yyVAL.labelNameSlice = clientmodel.LabelNames{}
}
case 29: case 29:
//line parser.y:168 //line parser.y:171
{ { yyVAL.labelNameSlice = yyS[yypt-1].labelNameSlice }
yyVAL.labelNameSlice = yyS[yypt-1].labelNameSlice
}
case 30: case 30:
//line parser.y:172 //line parser.y:175
{ { yyVAL.labelNameSlice = clientmodel.LabelNames{clientmodel.LabelName(yyS[yypt-0].str)} }
yyVAL.labelNameSlice = clientmodel.LabelNames{clientmodel.LabelName(yyS[yypt-0].str)}
}
case 31: case 31:
//line parser.y:174 //line parser.y:177
{ { yyVAL.labelNameSlice = append(yyVAL.labelNameSlice, clientmodel.LabelName(yyS[yypt-0].str)) }
yyVAL.labelNameSlice = append(yyVAL.labelNameSlice, clientmodel.LabelName(yyS[yypt-0].str))
}
case 32: case 32:
//line parser.y:178 //line parser.y:181
{ { yyVAL.ruleNodeSlice = []ast.Node{yyS[yypt-0].ruleNode} }
yyVAL.ruleNodeSlice = []ast.Node{yyS[yypt-0].ruleNode}
}
case 33: case 33:
//line parser.y:180 //line parser.y:183
{ { yyVAL.ruleNodeSlice = append(yyVAL.ruleNodeSlice, yyS[yypt-0].ruleNode) }
yyVAL.ruleNodeSlice = append(yyVAL.ruleNodeSlice, yyS[yypt-0].ruleNode)
}
case 34: case 34:
//line parser.y:184 //line parser.y:187
{ { yyVAL.ruleNode = yyS[yypt-0].ruleNode }
yyVAL.ruleNode = yyS[yypt-0].ruleNode
}
case 35: case 35:
//line parser.y:186 //line parser.y:189
{ { yyVAL.ruleNode = ast.NewStringLiteral(yyS[yypt-0].str) }
yyVAL.ruleNode = ast.NewStringLiteral(yyS[yypt-0].str)
}
} }
goto yystack /* stack new state and value */ goto yystack /* stack new state and value */
} }

View file

@ -123,6 +123,17 @@ func TestExpressions(t *testing.T) {
}, },
fullRanges: 0, fullRanges: 0,
intervalRanges: 8, intervalRanges: 8,
}, {
expr: `
// Test comment.
SUM(http_requests) BY /* comments shouldn't
have any effect */ (job) // another comment`,
output: []string{
`http_requests{job="api-server"} => 1000 @[%v]`,
`http_requests{job="app-server"} => 2600 @[%v]`,
},
fullRanges: 0,
intervalRanges: 8,
}, { }, {
expr: `COUNT(http_requests) BY (job)`, expr: `COUNT(http_requests) BY (job)`,
output: []string{ output: []string{
@ -448,7 +459,7 @@ var ruleTests = []struct {
{ {
inputFile: "syntax_error.rules", inputFile: "syntax_error.rules",
shouldFail: true, shouldFail: true,
errContains: "Error parsing rules at line 3", errContains: "Error parsing rules at line 5",
}, },
{ {
inputFile: "non_vector.rules", inputFile: "non_vector.rules",