PromQL: Avoid lexer item copies and allocations (#6584)

* PromQL: Avoid lexer item copies and allocations Signed-off-by: Tobias Guggenmos <tguggenm@redhat.com>
2025-03-05 20:59:13 -08:00 · 2020-01-09 12:26:58 +01:00 · 2020-01-09 12:26:58 +01:00 · b18b6cb332
parent 097fda1440
commit b18b6cb332
4 changed files with 77 additions and 61 deletions
--- a/promql/lex.go
+++ b/promql/lex.go
@ -223,7 +223,8 @@ type Lexer struct {
 	start       Pos     // Start position of this Item.
 	width       Pos     // Width of last rune read from input.
 	lastPos     Pos     // Position of most recent Item returned by NextItem.
-	Items   []Item  // Slice buffer of scanned Items.
+	itemp       *Item   // Pointer to where the next scanned item should be placed.
 	scannedItem bool    // Set to true every time an item is scanned.
 	parenDepth  int  // Nesting depth of ( ) exprs.
 	braceOpen   bool // Whether a { is opened.
@ -262,8 +263,9 @@ func (l *Lexer) backup() {
 // emit passes an Item back to the client.
 func (l *Lexer) emit(t ItemType) {
-	l.Items = append(l.Items, Item{t, l.start, l.input[l.start:l.pos]})
+	*l.itemp = Item{t, l.start, l.input[l.start:l.pos]}
 	l.start = l.pos
 	l.scannedItem = true
 }
 // ignore skips over the pending input before this point.
@ -308,23 +310,26 @@ func (l *Lexer) linePosition() int {
 // errorf returns an error token and terminates the scan by passing
 // back a nil pointer that will be the next state, terminating l.NextItem.
 func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
-	l.Items = append(l.Items, Item{ERROR, l.start, fmt.Sprintf(format, args...)})
+	*l.itemp = Item{ERROR, l.start, fmt.Sprintf(format, args...)}
 	l.scannedItem = true
 	return nil
 }
-// NextItem returns the next Item from the input.
+// NextItem writes the next item to the provided address.
-func (l *Lexer) NextItem() Item {
+func (l *Lexer) NextItem(itemp *Item) {
-	for len(l.Items) == 0 {
+	l.scannedItem = false
 	l.itemp = itemp
 	if l.state != nil {
 		for !l.scannedItem {
 			l.state = l.state(l)
 		}
 	} else {
 		l.emit(EOF)
 	}
-	}
+
-	Item := l.Items[0]
+	l.lastPos = l.itemp.Pos
 	l.Items = l.Items[1:]
 	l.lastPos = Item.Pos
 	return Item
 }
 // lex creates a new scanner for the input string.
@ -336,13 +341,6 @@ func Lex(input string) *Lexer {
 	return l
 }
 // run runs the state machine for the lexer.
 func (l *Lexer) run() {
 	for l.state = lexStatements; l.state != nil; {
 		l.state = l.state(l)
 	}
 }
 // lineComment is the character that starts a line comment.
 const lineComment = "#"
@ -442,7 +440,7 @@ func lexStatements(l *Lexer) stateFn {
 	case r == '{':
 		l.emit(LEFT_BRACE)
 		l.braceOpen = true
-		return lexInsideBraces(l)
+		return lexInsideBraces
 	case r == '[':
 		if l.bracketOpen {
 			return l.errorf("unexpected left bracket %q", r)
@ -559,14 +557,14 @@ func lexValueSequence(l *Lexer) stateFn {
 // package of the Go standard library to work for Prometheus-style strings.
 // None of the actual escaping/quoting logic was changed in this function - it
 // was only modified to integrate with our lexer.
-func lexEscape(l *Lexer) {
+func lexEscape(l *Lexer) stateFn {
 	var n int
 	var base, max uint32
 	ch := l.next()
 	switch ch {
 	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', l.stringOpen:
-		return
+		return lexString
 	case '0', '1', '2', '3', '4', '5', '6', '7':
 		n, base, max = 3, 8, 255
 	case 'x':
@ -580,8 +578,10 @@ func lexEscape(l *Lexer) {
 		n, base, max = 8, 16, unicode.MaxRune
 	case eof:
 		l.errorf("escape sequence not terminated")
 		return lexString
 	default:
 		l.errorf("unknown escape sequence %#U", ch)
 		return lexString
 	}
 	var x uint32
@ -590,8 +590,10 @@ func lexEscape(l *Lexer) {
 		if d >= base {
 			if ch == eof {
 				l.errorf("escape sequence not terminated")
 				return lexString
 			}
 			l.errorf("illegal character %#U in escape sequence", ch)
 			return lexString
 		}
 		x = x*base + d
 		ch = l.next()
@ -601,6 +603,7 @@ func lexEscape(l *Lexer) {
 	if x > max || 0xD800 <= x && x < 0xE000 {
 		l.errorf("escape sequence is an invalid Unicode code point")
 	}
 	return lexString
 }
 // digitVal returns the digit value of a rune or 16 in case the rune does not
@ -631,9 +634,10 @@ Loop:
 	for {
 		switch l.next() {
 		case '\\':
-			lexEscape(l)
+			return lexEscape
 		case utf8.RuneError:
-			return l.errorf("invalid UTF-8 rune")
+			l.errorf("invalid UTF-8 rune")
 			return lexString
 		case eof, '\n':
 			return l.errorf("unterminated quoted string")
 		case l.stringOpen:
@ -650,9 +654,11 @@ Loop:
 	for {
 		switch l.next() {
 		case utf8.RuneError:
-			return l.errorf("invalid UTF-8 rune")
+			l.errorf("invalid UTF-8 rune")
 			return lexRawString
 		case eof:
-			return l.errorf("unterminated raw string")
+			l.errorf("unterminated raw string")
 			return lexRawString
 		case l.stringOpen:
 			break Loop
 		}
--- a/promql/lex_test.go
+++ b/promql/lex_test.go
@ -697,13 +697,25 @@ func TestLexer(t *testing.T) {
 					input:      test.input,
 					seriesDesc: test.seriesDesc,
 				}
 				l.run()
-				out := l.Items
+				var out []Item
 				for l.state = lexStatements; l.state != nil; {
 					out = append(out, Item{})
 					l.NextItem(&out[len(out)-1])
 				}
 				lastItem := out[len(out)-1]
 				if test.fail {
-					if lastItem.Typ != ERROR {
+					hasError := false
 					for _, item := range out {
 						if item.Typ == ERROR {
 							hasError = true
 						}
 					}
 					if !hasError {
 						t.Logf("%d: input %q", i, test.input)
 						t.Fatalf("expected lexing error but did not fail")
 					}
--- a/promql/parse.go
+++ b/promql/parse.go
@ -30,11 +30,12 @@ import (
 type parser struct {
 	lex *Lexer
 	token Item
-	inject    Item
+	inject    ItemType
 	injecting bool
 	yyParser yyParserImpl
 	generatedParserResult interface{}
 }
@ -129,21 +130,6 @@ func (p *parser) typecheck(node Node) (err error) {
 	return nil
 }
 // next returns the next token.
 func (p *parser) next() Item {
 	t := p.lex.NextItem()
 	// Skip comments.
 	for t.Typ == COMMENT {
 		t = p.lex.NextItem()
 	}
 	p.token = t
 	if p.token.Typ == ERROR {
 		p.errorf("%s", p.token.Val)
 	}
 	return p.token
 }
 // errorf formats the error and terminates processing.
 func (p *parser) errorf(format string, args ...interface{}) {
 	p.error(errors.Errorf(format, args...))
@ -169,7 +155,7 @@ func (p *parser) unexpected(context string, expected string) {
 	var errMsg strings.Builder
 	errMsg.WriteString("unexpected ")
-	errMsg.WriteString(p.token.desc())
+	errMsg.WriteString(p.yyParser.lval.item.desc())
 	if context != "" {
 		errMsg.WriteString(" in ")
@ -211,16 +197,28 @@ func (p *parser) recover(errp *error) {
 //
 // For more information, see https://godoc.org/golang.org/x/tools/cmd/goyacc.
 func (p *parser) Lex(lval *yySymType) int {
 	var typ ItemType
 	if p.injecting {
 		lval.item = p.inject
 		p.injecting = false
 		return int(p.inject)
 	} else {
-		lval.item = p.next()
+		// Skip comments.
 		for {
 			p.lex.NextItem(&lval.item)
 			typ = lval.item.Typ
 			if typ != COMMENT {
 				break
 			}
 		}
 	}
-	typ := lval.item.Typ
+	if typ == ERROR {
 		p.errorf("%s", lval.item.Val)
 	}
 	if typ == EOF {
 		lval.item.Typ = EOF
 		p.InjectItem(0)
 	}
@ -251,7 +249,7 @@ func (p *parser) InjectItem(typ ItemType) {
 		panic("cannot inject symbol that isn't start symbol")
 	}
-	p.inject = Item{Typ: typ}
+	p.inject = typ
 	p.injecting = true
 }
 func (p *parser) newBinaryExpression(lhs Node, op Item, modifiers Node, rhs Node) *BinaryExpr {
@ -525,7 +523,7 @@ func parseDuration(ds string) (time.Duration, error) {
 func (p *parser) parseGenerated(startSymbol ItemType) interface{} {
 	p.InjectItem(startSymbol)
-	yyParse(p)
+	p.yyParser.Parse(p)
 	return p.generatedParserResult
--- a/promql/parse_test.go
+++ b/promql/parse_test.go
@ -233,7 +233,7 @@ var testExpr = []struct {
 	}, {
 		input:  "(1))",
 		fail:   true,
-		errMsg: "unexpected \")\"",
+		errMsg: "unexpected right parenthesis ')'",
 	}, {
 		input:  "((1)",
 		fail:   true,