From 3c9e3ee55259abc879e59fb1471afe1aa7ec3238 Mon Sep 17 00:00:00 2001 From: Pranshu Srivastava Date: Sun, 10 Sep 2023 04:27:20 +0530 Subject: [PATCH] parser: support underscores Support underscores in numbers, namely, decimals, hexadecimals, and exponentials. Fixes #12769 Signed-off-by: Pranshu Srivastava Signed-off-by: Pranshu Srivastava --- promql/parser/lex.go | 84 ++++++++++++++++--- promql/parser/lex_test.go | 78 +++++++++++++++++ promql/parser/parse_test.go | 4 +- web/ui/module/lezer-promql/src/promql.grammar | 2 +- 4 files changed, 155 insertions(+), 13 deletions(-) diff --git a/promql/parser/lex.go b/promql/parser/lex.go index 4e3de2a668..98ce370b0f 100644 --- a/promql/parser/lex.go +++ b/promql/parser/lex.go @@ -313,6 +313,11 @@ func (l *Lexer) accept(valid string) bool { return false } +// is peeks and returns true if the next rune is contained in the provided string. +func (l *Lexer) is(valid string) bool { + return strings.ContainsRune(valid, l.peek()) +} + // acceptRun consumes a run of runes from the valid set. func (l *Lexer) acceptRun(valid string) { for strings.ContainsRune(valid, l.next()) { @@ -901,19 +906,78 @@ func acceptRemainingDuration(l *Lexer) bool { // scanNumber scans numbers of different formats. The scanned Item is // not necessarily a valid number. This case is caught by the parser. func (l *Lexer) scanNumber() bool { - digits := "0123456789" + // Modify the digit pattern if the number is hexadecimal. + digitPattern := "0123456789" // Disallow hexadecimal in series descriptions as the syntax is ambiguous. - if !l.seriesDesc && l.accept("0") && l.accept("xX") { - digits = "0123456789abcdefABCDEF" + if !l.seriesDesc && + l.accept("0") && l.accept("xX") { + l.accept("_") // eg., 0X_1FFFP-16 == 0.1249847412109375 + digitPattern = "0123456789abcdefABCDEF" } - l.acceptRun(digits) - if l.accept(".") { - l.acceptRun(digits) - } - if l.accept("eE") { - l.accept("+-") - l.acceptRun("0123456789") + const ( + // Define dot, exponent, and underscore patterns. + dotPattern = "." + exponentPattern = "eE" + underscorePattern = "_" + // Anti-patterns are rune sets that cannot follow their respective rune. + dotAntiPattern = "_." + exponentAntiPattern = "._eE" // and EOL. + underscoreAntiPattern = "._eE" // and EOL. + ) + // All numbers follow the prefix: [.][d][d._eE]* + l.accept(dotPattern) + l.accept(digitPattern) + // [d._eE]* hereon. + dotConsumed := false + exponentConsumed := false + for l.is(digitPattern + dotPattern + underscorePattern + exponentPattern) { + // "." cannot repeat. + if l.is(dotPattern) { + if dotConsumed { + l.accept(dotPattern) + return false + } + } + // "eE" cannot repeat. + if l.is(exponentPattern) { + if exponentConsumed { + l.accept(exponentPattern) + return false + } + } + // Handle dots. + if l.accept(dotPattern) { + dotConsumed = true + if l.accept(dotAntiPattern) { + return false + } + // Fractional hexadecimal literals are not allowed. + if len(digitPattern) > 10 /* 0x[\da-fA-F].[\d]+p[\d] */ { + return false + } + continue + } + // Handle exponents. + if l.accept(exponentPattern) { + exponentConsumed = true + l.accept("+-") + if l.accept(exponentAntiPattern) || l.peek() == eof { + return false + } + continue + } + // Handle underscores. + if l.accept(underscorePattern) { + if l.accept(underscoreAntiPattern) || l.peek() == eof { + return false + } + + continue + } + // Handle digits at the end since we already consumed before this loop. + l.acceptRun(digitPattern) } + // Next thing must not be alphanumeric unless it's the times token // for series repetitions. if r := l.peek(); (l.seriesDesc && r == 'x') || !isAlphaNumeric(r) { diff --git a/promql/parser/lex_test.go b/promql/parser/lex_test.go index f48c457c0c..ac9aa27625 100644 --- a/promql/parser/lex_test.go +++ b/promql/parser/lex_test.go @@ -132,6 +132,84 @@ var tests = []struct { }, { input: "0x123", expected: []Item{{NUMBER, 0, "0x123"}}, + }, { + input: "1..2", + fail: true, + }, { + input: "1.2.", + fail: true, + }, { + input: "00_1_23_4.56_7_8", + expected: []Item{{NUMBER, 0, "00_1_23_4.56_7_8"}}, + }, { + input: "00_1_23__4.56_7_8", + fail: true, + }, { + input: "00_1_23_4._56_7_8", + fail: true, + }, { + input: "00_1_23_4_.56_7_8", + fail: true, + }, { + input: "0x1_2_34", + expected: []Item{{NUMBER, 0, "0x1_2_34"}}, + }, { + input: "0x1_2__34", + fail: true, + }, { + input: "0x1_2__34.5_6p1", // "0x1.1p1"-based formats are not supported yet. + fail: true, + }, { + input: "0x1_2__34.5_6", + fail: true, + }, { + input: "0x1_2__34.56", + fail: true, + }, { + input: "1_e2", + fail: true, + }, { + input: "1.e2", + expected: []Item{{NUMBER, 0, "1.e2"}}, + }, { + input: "1e.2", + fail: true, + }, { + input: "1e+.2", + fail: true, + }, { + input: "1ee2", + fail: true, + }, { + input: "1e+e2", + fail: true, + }, { + input: "1e", + fail: true, + }, { + input: "1e+", + fail: true, + }, { + input: "1e1_2_34", + expected: []Item{{NUMBER, 0, "1e1_2_34"}}, + }, { + input: "1e_1_2_34", + fail: true, + }, { + input: "1e1_2__34", + fail: true, + }, { + input: "1e+_1_2_34", + fail: true, + }, { + input: "1e-_1_2_34", + fail: true, + }, { + input: "12_", + fail: true, + }, { + input: "_1_2", + expected: []Item{{IDENTIFIER, 0, "_1_2"}}, }, }, }, diff --git a/promql/parser/parse_test.go b/promql/parser/parse_test.go index c56d845947..a4fe28e5b8 100644 --- a/promql/parser/parse_test.go +++ b/promql/parser/parse_test.go @@ -513,12 +513,12 @@ var testExpr = []struct { { input: "2.5.", fail: true, - errMsg: "unexpected character: '.'", + errMsg: `1:1: parse error: bad number or duration syntax: "2.5."`, }, { input: "100..4", fail: true, - errMsg: `unexpected number ".4"`, + errMsg: `1:1: parse error: bad number or duration syntax: "100.."`, }, { input: "0deadbeef", diff --git a/web/ui/module/lezer-promql/src/promql.grammar b/web/ui/module/lezer-promql/src/promql.grammar index fd4edddf24..89aa23c79a 100644 --- a/web/ui/module/lezer-promql/src/promql.grammar +++ b/web/ui/module/lezer-promql/src/promql.grammar @@ -261,7 +261,7 @@ NumberLiteral { LineComment { "#" ![\n]* } number { - (std.digit+ ("." std.digit*)? | "." std.digit+) (("e" | "E") ("+" | "-")? std.digit+)? | + (std.digit+ (("_")? std.digit)* ("." std.digit+ (("_")? std.digit)*)? | "." std.digit+ (("_")? std.digit)*) (("e" | "E") ("+" | "-")? std.digit+ (("_")? std.digit)*)? | "0x" (std.digit | $[a-fA-F])+ } StringLiteral { // TODO: This is for JS, make this work for PromQL.