Merge pull request #327 from prometheus/optimize/faster-lexer

Swap rules lexer for much faster one.
This commit is contained in:
juliusv 2013-07-11 11:08:20 -07:00
commit 8f0a3a060c
7 changed files with 1450 additions and 808 deletions

View file

@ -17,7 +17,8 @@ parser.y.go: parser.y
go tool yacc -o parser.y.go -v "" parser.y
lexer.l.go: parser.y.go lexer.l
golex lexer.l
# This is golex from https://github.com/cznic/golex.
golex -o="lexer.l.go" lexer.l
clean:
rm lexer.l.go parser.y.go

View file

@ -1,25 +1,53 @@
/* Copyright 2013 Prometheus Team
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http: *www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
// Copyright 2013 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
%{
package rules
import (
"fmt"
"strconv"
"strings"
clientmodel "github.com/prometheus/client_golang/model"
)
// Lex is called by the parser generated by "go tool yacc" to obtain each
// token. The method is opened before the matching rules block and closed at
// the end of the file.
func (lexer *RulesLexer) Lex(lval *yySymType) int {
// Internal lexer states.
const (
S_INITIAL = iota
S_COMMENTS
)
// We simulate multiple start symbols for closely-related grammars via dummy tokens. See
// http://www.gnu.org/software/bison/manual/html_node/Multiple-start_002dsymbols.html
// Reason: we want to be able to parse lists of named rules as well as single expressions.
if lexer.startToken != 0 {
startToken := lexer.startToken
lexer.startToken = 0
return startToken
}
c := lexer.current
currentState := 0
if lexer.empty {
c, lexer.empty = lexer.getChar(), false
}
%}
D [0-9]
@ -28,44 +56,50 @@ U [smhdwy]
%x S_COMMENTS
%yyc c
%yyn c = lexer.getChar()
%yyt currentState
%%
. { yypos++; REJECT }
\n { yyline++; yypos = 1; REJECT }
lexer.buf = lexer.buf[:0] // The code before the first rule executed before every scan cycle (rule #0 / state 0 action)
"/*" { BEGIN(S_COMMENTS) }
<S_COMMENTS>"*/" { BEGIN(0) }
<S_COMMENTS>. { /* ignore chars within multi-line comments */ }
"/*" currentState = S_COMMENTS
<S_COMMENTS>"*/" currentState = S_INITIAL
<S_COMMENTS>.|\n /* ignore chars within multi-line comments */
\/\/[^\r\n]*\n { /* gobble up one-line comments */ }
\/\/[^\r\n]*\n /* gobble up one-line comments */
ALERT|alert { return ALERT }
IF|if { return IF }
FOR|for { return FOR }
WITH|with { return WITH }
ALERT|alert return ALERT
IF|if return IF
FOR|for return FOR
WITH|with return WITH
PERMANENT|permanent { return PERMANENT }
BY|by { return GROUP_OP }
AVG|SUM|MAX|MIN|COUNT { yylval.str = yytext; return AGGR_OP }
avg|sum|max|min|count { yylval.str = strings.ToUpper(yytext); return AGGR_OP }
\<|>|AND|OR|and|or { yylval.str = strings.ToUpper(yytext); return CMP_OP }
==|!=|>=|<= { yylval.str = yytext; return CMP_OP }
[+\-] { yylval.str = yytext; return ADDITIVE_OP }
[*/%] { yylval.str = yytext; return MULT_OP }
PERMANENT|permanent return PERMANENT
BY|by return GROUP_OP
AVG|SUM|MAX|MIN|COUNT lval.str = lexer.token(); return AGGR_OP
avg|sum|max|min|count lval.str = strings.ToUpper(lexer.token()); return AGGR_OP
\<|>|AND|OR|and|or lval.str = strings.ToUpper(lexer.token()); return CMP_OP
==|!=|>=|<= lval.str = lexer.token(); return CMP_OP
[+\-] lval.str = lexer.token(); return ADDITIVE_OP
[*/%] lval.str = lexer.token(); return MULT_OP
{D}+{U} { yylval.str = yytext; return DURATION }
{L}({L}|{D})* { yylval.str = yytext; return IDENTIFIER }
{D}+{U} lval.str = lexer.token(); return DURATION
{L}({L}|{D})* lval.str = lexer.token(); return IDENTIFIER
\-?{D}+(\.{D}*)? { num, err := strconv.ParseFloat(yytext, 64);
if (err != nil && err.(*strconv.NumError).Err == strconv.ErrSyntax) {
panic("Invalid float")
}
yylval.num = clientmodel.SampleValue(num)
return NUMBER }
\-?{D}+(\.{D}*)? num, err := strconv.ParseFloat(lexer.token(), 64);
if (err != nil && err.(*strconv.NumError).Err == strconv.ErrSyntax) {
panic("Invalid float")
}
lval.num = clientmodel.SampleValue(num)
return NUMBER
\"(\\.|[^\\"])*\" { yylval.str = yytext[1:len(yytext) - 1]; return STRING }
\'(\\.|[^\\'])*\' { yylval.str = yytext[1:len(yytext) - 1]; return STRING }
\"(\\.|[^\\"])*\" lval.str = lexer.token()[1:len(lexer.token()) - 1]; return STRING
\'(\\.|[^\\'])*\' lval.str = lexer.token()[1:len(lexer.token()) - 1]; return STRING
[{}\[\]()=,] { return int(yytext[0]) }
. { /* don't print any remaining chars (whitespace) */ }
\n { /* don't print any remaining chars (whitespace) */ }
[{}\[\]()=,] return int(lexer.buf[0])
[\t\n\r ] /* gobble up any whitespace */
%%
lexer.empty = true
return int(c)
}

File diff suppressed because it is too large Load diff

View file

@ -14,69 +14,87 @@
package rules
import (
"bufio"
"errors"
"fmt"
"github.com/prometheus/prometheus/rules/ast"
"io"
"log"
"os"
"strings"
"sync"
)
// GoLex sadly needs these global variables for storing temporary token/parsing information.
var (
yylval *yySymType // For storing extra token information, like the contents of a string.
yyline int // Line number within the current file or buffer.
yypos int // Character position within the current line.
parseMutex sync.Mutex // Mutex protecting the parsing-related global state defined above.
"github.com/prometheus/prometheus/rules/ast"
)
type RulesLexer struct {
errors []string // Errors encountered during parsing.
startToken int // Dummy token to simulate multiple start symbols (see below).
parsedRules []Rule // Parsed full rules.
parsedExpr ast.Node // Parsed single expression.
}
// Errors encountered during parsing.
errors []string
// Dummy token to simulate multiple start symbols (see below).
startToken int
// Parsed full rules.
parsedRules []Rule
// Parsed single expression.
parsedExpr ast.Node
func (lexer *RulesLexer) Lex(lval *yySymType) int {
yylval = lval
// Current character.
current byte
// Current token buffer.
buf []byte
// Input text.
src *bufio.Reader
// Whether we have a current char.
empty bool
// We simulate multiple start symbols for closely-related grammars via dummy tokens. See
// http://www.gnu.org/software/bison/manual/html_node/Multiple-start_002dsymbols.html
// Reason: we want to be able to parse lists of named rules as well as single expressions.
if lexer.startToken != 0 {
startToken := lexer.startToken
lexer.startToken = 0
return startToken
}
tokenType := yylex()
return tokenType
// Current input line.
line int
// Current character position within the current input line.
pos int
}
func (lexer *RulesLexer) Error(errorStr string) {
err := fmt.Sprintf("Error parsing rules at line %v, char %v: %v", yyline, yypos, errorStr)
err := fmt.Sprintf("Error parsing rules at line %v, char %v: %v", lexer.line, lexer.pos, errorStr)
lexer.errors = append(lexer.errors, err)
}
func LoadFromReader(rulesReader io.Reader, singleExpr bool) (interface{}, error) {
parseMutex.Lock()
defer parseMutex.Unlock()
func (lexer *RulesLexer) getChar() byte {
if lexer.current != 0 {
lexer.buf = append(lexer.buf, lexer.current)
}
lexer.current = 0
if b, err := lexer.src.ReadByte(); err == nil {
if b == '\n' {
lexer.line++
lexer.pos = 0
} else {
lexer.pos++
}
lexer.current = b
} else if err != io.EOF {
log.Fatal(err)
}
return lexer.current
}
yyin = rulesReader
yypos = 1
yyline = 1
yydata = ""
yytext = ""
func (lexer *RulesLexer) token() string {
return string(lexer.buf)
}
func newRulesLexer(src io.Reader, singleExpr bool) *RulesLexer {
lexer := &RulesLexer{
startToken: START_RULES,
src: bufio.NewReader(src),
pos: 1,
line: 1,
}
if singleExpr {
lexer.startToken = START_EXPRESSION
}
lexer.getChar()
return lexer
}
func LoadFromReader(rulesReader io.Reader, singleExpr bool) (interface{}, error) {
lexer := newRulesLexer(rulesReader, singleExpr)
ret := yyParse(lexer)
if ret != 0 && len(lexer.errors) == 0 {
lexer.Error("Unknown parser error")

View file

@ -14,10 +14,10 @@
%{
package rules
import (
clientmodel "github.com/prometheus/client_golang/model"
import (
clientmodel "github.com/prometheus/client_golang/model"
import "github.com/prometheus/prometheus/rules/ast"
"github.com/prometheus/prometheus/rules/ast"
)
%}

View file

@ -1,22 +1,25 @@
//line parser.y:15
package rules
//line parser.y:15
package rules
import __yyfmt__ "fmt"
//line parser.y:15
import clientmodel "github.com/prometheus/client_golang/model"
import "github.com/prometheus/prometheus/rules/ast"
import (
clientmodel "github.com/prometheus/client_golang/model"
//line parser.y:21
"github.com/prometheus/prometheus/rules/ast"
)
//line parser.y:24
type yySymType struct {
yys int
num clientmodel.SampleValue
str string
ruleNode ast.Node
ruleNodeSlice []ast.Node
boolean bool
labelNameSlice clientmodel.LabelNames
labelSet clientmodel.LabelSet
yys int
num clientmodel.SampleValue
str string
ruleNode ast.Node
ruleNodeSlice []ast.Node
boolean bool
labelNameSlice clientmodel.LabelNames
labelSet clientmodel.LabelSet
}
const START_RULES = 57346
@ -61,7 +64,8 @@ const yyEofCode = 1
const yyErrCode = 2
const yyMaxDepth = 200
//line parser.y:188
//line parser.y:191
//line yacctab:1
var yyExca = []int{
@ -394,208 +398,134 @@ yydefault:
switch yynt {
case 5:
//line parser.y:66
{
yylex.(*RulesLexer).parsedExpr = yyS[yypt-0].ruleNode
}
//line parser.y:69
{ yylex.(*RulesLexer).parsedExpr = yyS[yypt-0].ruleNode }
case 6:
//line parser.y:70
//line parser.y:73
{
rule, err := CreateRecordingRule(yyS[yypt-3].str, yyS[yypt-2].labelSet, yyS[yypt-0].ruleNode, yyS[yypt-4].boolean)
if err != nil {
yylex.Error(err.Error())
return 1
}
yylex.(*RulesLexer).parsedRules = append(yylex.(*RulesLexer).parsedRules, rule)
}
rule, err := CreateRecordingRule(yyS[yypt-3].str, yyS[yypt-2].labelSet, yyS[yypt-0].ruleNode, yyS[yypt-4].boolean)
if err != nil { yylex.Error(err.Error()); return 1 }
yylex.(*RulesLexer).parsedRules = append(yylex.(*RulesLexer).parsedRules, rule)
}
case 7:
//line parser.y:76
//line parser.y:79
{
rule, err := CreateAlertingRule(yyS[yypt-5].str, yyS[yypt-3].ruleNode, yyS[yypt-2].str, yyS[yypt-0].labelSet)
if err != nil {
yylex.Error(err.Error())
return 1
}
yylex.(*RulesLexer).parsedRules = append(yylex.(*RulesLexer).parsedRules, rule)
}
rule, err := CreateAlertingRule(yyS[yypt-5].str, yyS[yypt-3].ruleNode, yyS[yypt-2].str, yyS[yypt-0].labelSet)
if err != nil { yylex.Error(err.Error()); return 1 }
yylex.(*RulesLexer).parsedRules = append(yylex.(*RulesLexer).parsedRules, rule)
}
case 8:
//line parser.y:84
{
yyVAL.str = "0s"
}
//line parser.y:87
{ yyVAL.str = "0s" }
case 9:
//line parser.y:86
{
yyVAL.str = yyS[yypt-0].str
}
//line parser.y:89
{ yyVAL.str = yyS[yypt-0].str }
case 10:
//line parser.y:90
{
yyVAL.boolean = false
}
//line parser.y:93
{ yyVAL.boolean = false }
case 11:
//line parser.y:92
{
yyVAL.boolean = true
}
//line parser.y:95
{ yyVAL.boolean = true }
case 12:
//line parser.y:96
{
yyVAL.labelSet = clientmodel.LabelSet{}
}
//line parser.y:99
{ yyVAL.labelSet = clientmodel.LabelSet{} }
case 13:
//line parser.y:98
{
yyVAL.labelSet = yyS[yypt-1].labelSet
}
//line parser.y:101
{ yyVAL.labelSet = yyS[yypt-1].labelSet }
case 14:
//line parser.y:100
{
yyVAL.labelSet = clientmodel.LabelSet{}
}
case 15:
//line parser.y:103
{
yyVAL.labelSet = yyS[yypt-0].labelSet
}
{ yyVAL.labelSet = clientmodel.LabelSet{} }
case 15:
//line parser.y:106
{ yyVAL.labelSet = yyS[yypt-0].labelSet }
case 16:
//line parser.y:105
{
for k, v := range yyS[yypt-0].labelSet {
yyVAL.labelSet[k] = v
}
}
//line parser.y:108
{ for k, v := range yyS[yypt-0].labelSet { yyVAL.labelSet[k] = v } }
case 17:
//line parser.y:109
{
yyVAL.labelSet = clientmodel.LabelSet{clientmodel.LabelName(yyS[yypt-2].str): clientmodel.LabelValue(yyS[yypt-0].str)}
}
//line parser.y:112
{ yyVAL.labelSet = clientmodel.LabelSet{ clientmodel.LabelName(yyS[yypt-2].str): clientmodel.LabelValue(yyS[yypt-0].str) } }
case 18:
//line parser.y:114
{
yyVAL.ruleNode = yyS[yypt-1].ruleNode
}
//line parser.y:117
{ yyVAL.ruleNode = yyS[yypt-1].ruleNode }
case 19:
//line parser.y:116
{
yyS[yypt-0].labelSet[clientmodel.MetricNameLabel] = clientmodel.LabelValue(yyS[yypt-1].str)
yyVAL.ruleNode = ast.NewVectorLiteral(yyS[yypt-0].labelSet)
}
//line parser.y:119
{ yyS[yypt-0].labelSet[clientmodel.MetricNameLabel] = clientmodel.LabelValue(yyS[yypt-1].str); yyVAL.ruleNode = ast.NewVectorLiteral(yyS[yypt-0].labelSet) }
case 20:
//line parser.y:118
//line parser.y:121
{
var err error
yyVAL.ruleNode, err = NewFunctionCall(yyS[yypt-3].str, yyS[yypt-1].ruleNodeSlice)
if err != nil {
yylex.Error(err.Error())
return 1
}
}
var err error
yyVAL.ruleNode, err = NewFunctionCall(yyS[yypt-3].str, yyS[yypt-1].ruleNodeSlice)
if err != nil { yylex.Error(err.Error()); return 1 }
}
case 21:
//line parser.y:124
//line parser.y:127
{
var err error
yyVAL.ruleNode, err = NewFunctionCall(yyS[yypt-2].str, []ast.Node{})
if err != nil {
yylex.Error(err.Error())
return 1
}
}
var err error
yyVAL.ruleNode, err = NewFunctionCall(yyS[yypt-2].str, []ast.Node{})
if err != nil { yylex.Error(err.Error()); return 1 }
}
case 22:
//line parser.y:130
//line parser.y:133
{
var err error
yyVAL.ruleNode, err = NewMatrix(yyS[yypt-3].ruleNode, yyS[yypt-1].str)
if err != nil {
yylex.Error(err.Error())
return 1
}
}
var err error
yyVAL.ruleNode, err = NewMatrix(yyS[yypt-3].ruleNode, yyS[yypt-1].str)
if err != nil { yylex.Error(err.Error()); return 1 }
}
case 23:
//line parser.y:136
//line parser.y:139
{
var err error
yyVAL.ruleNode, err = NewVectorAggregation(yyS[yypt-4].str, yyS[yypt-2].ruleNode, yyS[yypt-0].labelNameSlice)
if err != nil {
yylex.Error(err.Error())
return 1
}
}
var err error
yyVAL.ruleNode, err = NewVectorAggregation(yyS[yypt-4].str, yyS[yypt-2].ruleNode, yyS[yypt-0].labelNameSlice)
if err != nil { yylex.Error(err.Error()); return 1 }
}
case 24:
//line parser.y:144
//line parser.y:147
{
var err error
yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode)
if err != nil {
yylex.Error(err.Error())
return 1
}
}
var err error
yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode)
if err != nil { yylex.Error(err.Error()); return 1 }
}
case 25:
//line parser.y:150
//line parser.y:153
{
var err error
yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode)
if err != nil {
yylex.Error(err.Error())
return 1
}
}
var err error
yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode)
if err != nil { yylex.Error(err.Error()); return 1 }
}
case 26:
//line parser.y:156
//line parser.y:159
{
var err error
yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode)
if err != nil {
yylex.Error(err.Error())
return 1
}
}
var err error
yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode)
if err != nil { yylex.Error(err.Error()); return 1 }
}
case 27:
//line parser.y:162
{
yyVAL.ruleNode = ast.NewScalarLiteral(yyS[yypt-0].num)
}
//line parser.y:165
{ yyVAL.ruleNode = ast.NewScalarLiteral(yyS[yypt-0].num)}
case 28:
//line parser.y:166
{
yyVAL.labelNameSlice = clientmodel.LabelNames{}
}
//line parser.y:169
{ yyVAL.labelNameSlice = clientmodel.LabelNames{} }
case 29:
//line parser.y:168
{
yyVAL.labelNameSlice = yyS[yypt-1].labelNameSlice
}
//line parser.y:171
{ yyVAL.labelNameSlice = yyS[yypt-1].labelNameSlice }
case 30:
//line parser.y:172
{
yyVAL.labelNameSlice = clientmodel.LabelNames{clientmodel.LabelName(yyS[yypt-0].str)}
}
//line parser.y:175
{ yyVAL.labelNameSlice = clientmodel.LabelNames{clientmodel.LabelName(yyS[yypt-0].str)} }
case 31:
//line parser.y:174
{
yyVAL.labelNameSlice = append(yyVAL.labelNameSlice, clientmodel.LabelName(yyS[yypt-0].str))
}
//line parser.y:177
{ yyVAL.labelNameSlice = append(yyVAL.labelNameSlice, clientmodel.LabelName(yyS[yypt-0].str)) }
case 32:
//line parser.y:178
{
yyVAL.ruleNodeSlice = []ast.Node{yyS[yypt-0].ruleNode}
}
//line parser.y:181
{ yyVAL.ruleNodeSlice = []ast.Node{yyS[yypt-0].ruleNode} }
case 33:
//line parser.y:180
{
yyVAL.ruleNodeSlice = append(yyVAL.ruleNodeSlice, yyS[yypt-0].ruleNode)
}
//line parser.y:183
{ yyVAL.ruleNodeSlice = append(yyVAL.ruleNodeSlice, yyS[yypt-0].ruleNode) }
case 34:
//line parser.y:184
{
yyVAL.ruleNode = yyS[yypt-0].ruleNode
}
//line parser.y:187
{ yyVAL.ruleNode = yyS[yypt-0].ruleNode }
case 35:
//line parser.y:186
{
yyVAL.ruleNode = ast.NewStringLiteral(yyS[yypt-0].str)
}
//line parser.y:189
{ yyVAL.ruleNode = ast.NewStringLiteral(yyS[yypt-0].str) }
}
goto yystack /* stack new state and value */
}

View file

@ -123,6 +123,17 @@ func TestExpressions(t *testing.T) {
},
fullRanges: 0,
intervalRanges: 8,
}, {
expr: `
// Test comment.
SUM(http_requests) BY /* comments shouldn't
have any effect */ (job) // another comment`,
output: []string{
`http_requests{job="api-server"} => 1000 @[%v]`,
`http_requests{job="app-server"} => 2600 @[%v]`,
},
fullRanges: 0,
intervalRanges: 8,
}, {
expr: `COUNT(http_requests) BY (job)`,
output: []string{
@ -448,7 +459,7 @@ var ruleTests = []struct {
{
inputFile: "syntax_error.rules",
shouldFail: true,
errContains: "Error parsing rules at line 3",
errContains: "Error parsing rules at line 5",
},
{
inputFile: "non_vector.rules",