PromQL: Avoid lexer item copies and allocations (#6584)

* PromQL: Avoid lexer item copies and allocations

Signed-off-by: Tobias Guggenmos <tguggenm@redhat.com>
This commit is contained in:
Tobias Guggenmos 2020-01-09 12:26:58 +01:00 committed by Brian Brazil
parent 097fda1440
commit b18b6cb332
4 changed files with 77 additions and 61 deletions

View file

@ -223,7 +223,8 @@ type Lexer struct {
start Pos // Start position of this Item. start Pos // Start position of this Item.
width Pos // Width of last rune read from input. width Pos // Width of last rune read from input.
lastPos Pos // Position of most recent Item returned by NextItem. lastPos Pos // Position of most recent Item returned by NextItem.
Items []Item // Slice buffer of scanned Items. itemp *Item // Pointer to where the next scanned item should be placed.
scannedItem bool // Set to true every time an item is scanned.
parenDepth int // Nesting depth of ( ) exprs. parenDepth int // Nesting depth of ( ) exprs.
braceOpen bool // Whether a { is opened. braceOpen bool // Whether a { is opened.
@ -262,8 +263,9 @@ func (l *Lexer) backup() {
// emit passes an Item back to the client. // emit passes an Item back to the client.
func (l *Lexer) emit(t ItemType) { func (l *Lexer) emit(t ItemType) {
l.Items = append(l.Items, Item{t, l.start, l.input[l.start:l.pos]}) *l.itemp = Item{t, l.start, l.input[l.start:l.pos]}
l.start = l.pos l.start = l.pos
l.scannedItem = true
} }
// ignore skips over the pending input before this point. // ignore skips over the pending input before this point.
@ -308,23 +310,26 @@ func (l *Lexer) linePosition() int {
// errorf returns an error token and terminates the scan by passing // errorf returns an error token and terminates the scan by passing
// back a nil pointer that will be the next state, terminating l.NextItem. // back a nil pointer that will be the next state, terminating l.NextItem.
func (l *Lexer) errorf(format string, args ...interface{}) stateFn { func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
l.Items = append(l.Items, Item{ERROR, l.start, fmt.Sprintf(format, args...)}) *l.itemp = Item{ERROR, l.start, fmt.Sprintf(format, args...)}
l.scannedItem = true
return nil return nil
} }
// NextItem returns the next Item from the input. // NextItem writes the next item to the provided address.
func (l *Lexer) NextItem() Item { func (l *Lexer) NextItem(itemp *Item) {
for len(l.Items) == 0 { l.scannedItem = false
l.itemp = itemp
if l.state != nil { if l.state != nil {
for !l.scannedItem {
l.state = l.state(l) l.state = l.state(l)
}
} else { } else {
l.emit(EOF) l.emit(EOF)
} }
}
Item := l.Items[0] l.lastPos = l.itemp.Pos
l.Items = l.Items[1:]
l.lastPos = Item.Pos
return Item
} }
// lex creates a new scanner for the input string. // lex creates a new scanner for the input string.
@ -336,13 +341,6 @@ func Lex(input string) *Lexer {
return l return l
} }
// run runs the state machine for the lexer.
func (l *Lexer) run() {
for l.state = lexStatements; l.state != nil; {
l.state = l.state(l)
}
}
// lineComment is the character that starts a line comment. // lineComment is the character that starts a line comment.
const lineComment = "#" const lineComment = "#"
@ -442,7 +440,7 @@ func lexStatements(l *Lexer) stateFn {
case r == '{': case r == '{':
l.emit(LEFT_BRACE) l.emit(LEFT_BRACE)
l.braceOpen = true l.braceOpen = true
return lexInsideBraces(l) return lexInsideBraces
case r == '[': case r == '[':
if l.bracketOpen { if l.bracketOpen {
return l.errorf("unexpected left bracket %q", r) return l.errorf("unexpected left bracket %q", r)
@ -559,14 +557,14 @@ func lexValueSequence(l *Lexer) stateFn {
// package of the Go standard library to work for Prometheus-style strings. // package of the Go standard library to work for Prometheus-style strings.
// None of the actual escaping/quoting logic was changed in this function - it // None of the actual escaping/quoting logic was changed in this function - it
// was only modified to integrate with our lexer. // was only modified to integrate with our lexer.
func lexEscape(l *Lexer) { func lexEscape(l *Lexer) stateFn {
var n int var n int
var base, max uint32 var base, max uint32
ch := l.next() ch := l.next()
switch ch { switch ch {
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', l.stringOpen: case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', l.stringOpen:
return return lexString
case '0', '1', '2', '3', '4', '5', '6', '7': case '0', '1', '2', '3', '4', '5', '6', '7':
n, base, max = 3, 8, 255 n, base, max = 3, 8, 255
case 'x': case 'x':
@ -580,8 +578,10 @@ func lexEscape(l *Lexer) {
n, base, max = 8, 16, unicode.MaxRune n, base, max = 8, 16, unicode.MaxRune
case eof: case eof:
l.errorf("escape sequence not terminated") l.errorf("escape sequence not terminated")
return lexString
default: default:
l.errorf("unknown escape sequence %#U", ch) l.errorf("unknown escape sequence %#U", ch)
return lexString
} }
var x uint32 var x uint32
@ -590,8 +590,10 @@ func lexEscape(l *Lexer) {
if d >= base { if d >= base {
if ch == eof { if ch == eof {
l.errorf("escape sequence not terminated") l.errorf("escape sequence not terminated")
return lexString
} }
l.errorf("illegal character %#U in escape sequence", ch) l.errorf("illegal character %#U in escape sequence", ch)
return lexString
} }
x = x*base + d x = x*base + d
ch = l.next() ch = l.next()
@ -601,6 +603,7 @@ func lexEscape(l *Lexer) {
if x > max || 0xD800 <= x && x < 0xE000 { if x > max || 0xD800 <= x && x < 0xE000 {
l.errorf("escape sequence is an invalid Unicode code point") l.errorf("escape sequence is an invalid Unicode code point")
} }
return lexString
} }
// digitVal returns the digit value of a rune or 16 in case the rune does not // digitVal returns the digit value of a rune or 16 in case the rune does not
@ -631,9 +634,10 @@ Loop:
for { for {
switch l.next() { switch l.next() {
case '\\': case '\\':
lexEscape(l) return lexEscape
case utf8.RuneError: case utf8.RuneError:
return l.errorf("invalid UTF-8 rune") l.errorf("invalid UTF-8 rune")
return lexString
case eof, '\n': case eof, '\n':
return l.errorf("unterminated quoted string") return l.errorf("unterminated quoted string")
case l.stringOpen: case l.stringOpen:
@ -650,9 +654,11 @@ Loop:
for { for {
switch l.next() { switch l.next() {
case utf8.RuneError: case utf8.RuneError:
return l.errorf("invalid UTF-8 rune") l.errorf("invalid UTF-8 rune")
return lexRawString
case eof: case eof:
return l.errorf("unterminated raw string") l.errorf("unterminated raw string")
return lexRawString
case l.stringOpen: case l.stringOpen:
break Loop break Loop
} }

View file

@ -697,13 +697,25 @@ func TestLexer(t *testing.T) {
input: test.input, input: test.input,
seriesDesc: test.seriesDesc, seriesDesc: test.seriesDesc,
} }
l.run()
out := l.Items var out []Item
for l.state = lexStatements; l.state != nil; {
out = append(out, Item{})
l.NextItem(&out[len(out)-1])
}
lastItem := out[len(out)-1] lastItem := out[len(out)-1]
if test.fail { if test.fail {
if lastItem.Typ != ERROR { hasError := false
for _, item := range out {
if item.Typ == ERROR {
hasError = true
}
}
if !hasError {
t.Logf("%d: input %q", i, test.input) t.Logf("%d: input %q", i, test.input)
t.Fatalf("expected lexing error but did not fail") t.Fatalf("expected lexing error but did not fail")
} }

View file

@ -30,11 +30,12 @@ import (
type parser struct { type parser struct {
lex *Lexer lex *Lexer
token Item
inject Item inject ItemType
injecting bool injecting bool
yyParser yyParserImpl
generatedParserResult interface{} generatedParserResult interface{}
} }
@ -129,21 +130,6 @@ func (p *parser) typecheck(node Node) (err error) {
return nil return nil
} }
// next returns the next token.
func (p *parser) next() Item {
t := p.lex.NextItem()
// Skip comments.
for t.Typ == COMMENT {
t = p.lex.NextItem()
}
p.token = t
if p.token.Typ == ERROR {
p.errorf("%s", p.token.Val)
}
return p.token
}
// errorf formats the error and terminates processing. // errorf formats the error and terminates processing.
func (p *parser) errorf(format string, args ...interface{}) { func (p *parser) errorf(format string, args ...interface{}) {
p.error(errors.Errorf(format, args...)) p.error(errors.Errorf(format, args...))
@ -169,7 +155,7 @@ func (p *parser) unexpected(context string, expected string) {
var errMsg strings.Builder var errMsg strings.Builder
errMsg.WriteString("unexpected ") errMsg.WriteString("unexpected ")
errMsg.WriteString(p.token.desc()) errMsg.WriteString(p.yyParser.lval.item.desc())
if context != "" { if context != "" {
errMsg.WriteString(" in ") errMsg.WriteString(" in ")
@ -211,16 +197,28 @@ func (p *parser) recover(errp *error) {
// //
// For more information, see https://godoc.org/golang.org/x/tools/cmd/goyacc. // For more information, see https://godoc.org/golang.org/x/tools/cmd/goyacc.
func (p *parser) Lex(lval *yySymType) int { func (p *parser) Lex(lval *yySymType) int {
var typ ItemType
if p.injecting { if p.injecting {
lval.item = p.inject
p.injecting = false p.injecting = false
return int(p.inject)
} else { } else {
lval.item = p.next() // Skip comments.
for {
p.lex.NextItem(&lval.item)
typ = lval.item.Typ
if typ != COMMENT {
break
}
}
} }
typ := lval.item.Typ if typ == ERROR {
p.errorf("%s", lval.item.Val)
}
if typ == EOF { if typ == EOF {
lval.item.Typ = EOF
p.InjectItem(0) p.InjectItem(0)
} }
@ -251,7 +249,7 @@ func (p *parser) InjectItem(typ ItemType) {
panic("cannot inject symbol that isn't start symbol") panic("cannot inject symbol that isn't start symbol")
} }
p.inject = Item{Typ: typ} p.inject = typ
p.injecting = true p.injecting = true
} }
func (p *parser) newBinaryExpression(lhs Node, op Item, modifiers Node, rhs Node) *BinaryExpr { func (p *parser) newBinaryExpression(lhs Node, op Item, modifiers Node, rhs Node) *BinaryExpr {
@ -525,7 +523,7 @@ func parseDuration(ds string) (time.Duration, error) {
func (p *parser) parseGenerated(startSymbol ItemType) interface{} { func (p *parser) parseGenerated(startSymbol ItemType) interface{} {
p.InjectItem(startSymbol) p.InjectItem(startSymbol)
yyParse(p) p.yyParser.Parse(p)
return p.generatedParserResult return p.generatedParserResult

View file

@ -233,7 +233,7 @@ var testExpr = []struct {
}, { }, {
input: "(1))", input: "(1))",
fail: true, fail: true,
errMsg: "unexpected \")\"", errMsg: "unexpected right parenthesis ')'",
}, { }, {
input: "((1)", input: "((1)",
fail: true, fail: true,