prometheus/promql/parser/parse.go
Julien Pivotto a605b81b14
Merge pull request #12170 from fpetkovski/parser-inject-functions
parser: Allow parsing arbitrary functions
2023-06-27 13:32:46 +02:00

840 lines
23 KiB
Go

// Copyright 2015 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package parser
import (
"errors"
"fmt"
"math"
"os"
"runtime"
"strconv"
"strings"
"sync"
"time"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/timestamp"
"github.com/prometheus/prometheus/util/strutil"
)
var parserPool = sync.Pool{
New: func() interface{} {
return &parser{}
},
}
type Parser interface {
ParseExpr() (Expr, error)
Close()
}
type parser struct {
lex Lexer
inject ItemType
injecting bool
// functions contains all functions supported by the parser instance.
functions map[string]*Function
// Everytime an Item is lexed that could be the end
// of certain expressions its end position is stored here.
lastClosing Pos
yyParser yyParserImpl
generatedParserResult interface{}
parseErrors ParseErrors
}
type Opt func(p *parser)
func WithFunctions(functions map[string]*Function) Opt {
return func(p *parser) {
p.functions = functions
}
}
// NewParser returns a new parser.
// nolint:revive
func NewParser(input string, opts ...Opt) *parser {
p := parserPool.Get().(*parser)
p.functions = Functions
p.injecting = false
p.parseErrors = nil
p.generatedParserResult = nil
// Clear lexer struct before reusing.
p.lex = Lexer{
input: input,
state: lexStatements,
}
// Apply user define options.
for _, opt := range opts {
opt(p)
}
return p
}
func (p *parser) ParseExpr() (expr Expr, err error) {
defer p.recover(&err)
parseResult := p.parseGenerated(START_EXPRESSION)
if parseResult != nil {
expr = parseResult.(Expr)
}
// Only typecheck when there are no syntax errors.
if len(p.parseErrors) == 0 {
p.checkAST(expr)
}
if len(p.parseErrors) != 0 {
err = p.parseErrors
}
return expr, err
}
func (p *parser) Close() {
defer parserPool.Put(p)
}
// ParseErr wraps a parsing error with line and position context.
type ParseErr struct {
PositionRange PositionRange
Err error
Query string
// LineOffset is an additional line offset to be added. Only used inside unit tests.
LineOffset int
}
func (e *ParseErr) Error() string {
pos := int(e.PositionRange.Start)
lastLineBreak := -1
line := e.LineOffset + 1
var positionStr string
if pos < 0 || pos > len(e.Query) {
positionStr = "invalid position:"
} else {
for i, c := range e.Query[:pos] {
if c == '\n' {
lastLineBreak = i
line++
}
}
col := pos - lastLineBreak
positionStr = fmt.Sprintf("%d:%d:", line, col)
}
return fmt.Sprintf("%s parse error: %s", positionStr, e.Err)
}
type ParseErrors []ParseErr
// Since producing multiple error messages might look weird when combined with error wrapping,
// only the first error produced by the parser is included in the error string.
// If getting the full error list is desired, it is recommended to typecast the error returned
// by the parser to ParseErrors and work with the underlying slice.
func (errs ParseErrors) Error() string {
if len(errs) != 0 {
return errs[0].Error()
}
// Should never happen
// Panicking while printing an error seems like a bad idea, so the
// situation is explained in the error message instead.
return "error contains no error message"
}
// ParseExpr returns the expression parsed from the input.
func ParseExpr(input string) (expr Expr, err error) {
p := NewParser(input)
defer p.Close()
return p.ParseExpr()
}
// ParseMetric parses the input into a metric
func ParseMetric(input string) (m labels.Labels, err error) {
p := NewParser(input)
defer p.Close()
defer p.recover(&err)
parseResult := p.parseGenerated(START_METRIC)
if parseResult != nil {
m = parseResult.(labels.Labels)
}
if len(p.parseErrors) != 0 {
err = p.parseErrors
}
return m, err
}
// ParseMetricSelector parses the provided textual metric selector into a list of
// label matchers.
func ParseMetricSelector(input string) (m []*labels.Matcher, err error) {
p := NewParser(input)
defer p.Close()
defer p.recover(&err)
parseResult := p.parseGenerated(START_METRIC_SELECTOR)
if parseResult != nil {
m = parseResult.(*VectorSelector).LabelMatchers
}
if len(p.parseErrors) != 0 {
err = p.parseErrors
}
return m, err
}
// SequenceValue is an omittable value in a sequence of time series values.
type SequenceValue struct {
Value float64
Omitted bool
}
func (v SequenceValue) String() string {
if v.Omitted {
return "_"
}
return fmt.Sprintf("%f", v.Value)
}
type seriesDescription struct {
labels labels.Labels
values []SequenceValue
}
// ParseSeriesDesc parses the description of a time series.
func ParseSeriesDesc(input string) (labels labels.Labels, values []SequenceValue, err error) {
p := NewParser(input)
p.lex.seriesDesc = true
defer p.Close()
defer p.recover(&err)
parseResult := p.parseGenerated(START_SERIES_DESCRIPTION)
if parseResult != nil {
result := parseResult.(*seriesDescription)
labels = result.labels
values = result.values
}
if len(p.parseErrors) != 0 {
err = p.parseErrors
}
return labels, values, err
}
// addParseErrf formats the error and appends it to the list of parsing errors.
func (p *parser) addParseErrf(positionRange PositionRange, format string, args ...interface{}) {
p.addParseErr(positionRange, fmt.Errorf(format, args...))
}
// addParseErr appends the provided error to the list of parsing errors.
func (p *parser) addParseErr(positionRange PositionRange, err error) {
perr := ParseErr{
PositionRange: positionRange,
Err: err,
Query: p.lex.input,
}
p.parseErrors = append(p.parseErrors, perr)
}
// unexpected creates a parser error complaining about an unexpected lexer item.
// The item that is presented as unexpected is always the last item produced
// by the lexer.
func (p *parser) unexpected(context, expected string) {
var errMsg strings.Builder
// Do not report lexer errors twice
if p.yyParser.lval.item.Typ == ERROR {
return
}
errMsg.WriteString("unexpected ")
errMsg.WriteString(p.yyParser.lval.item.desc())
if context != "" {
errMsg.WriteString(" in ")
errMsg.WriteString(context)
}
if expected != "" {
errMsg.WriteString(", expected ")
errMsg.WriteString(expected)
}
p.addParseErr(p.yyParser.lval.item.PositionRange(), errors.New(errMsg.String()))
}
var errUnexpected = errors.New("unexpected error")
// recover is the handler that turns panics into returns from the top level of Parse.
func (p *parser) recover(errp *error) {
e := recover()
switch _, ok := e.(runtime.Error); {
case ok:
// Print the stack trace but do not inhibit the running application.
buf := make([]byte, 64<<10)
buf = buf[:runtime.Stack(buf, false)]
fmt.Fprintf(os.Stderr, "parser panic: %v\n%s", e, buf)
*errp = errUnexpected
case e != nil:
*errp = e.(error)
}
}
// Lex is expected by the yyLexer interface of the yacc generated parser.
// It writes the next Item provided by the lexer to the provided pointer address.
// Comments are skipped.
//
// The yyLexer interface is currently implemented by the parser to allow
// the generated and non-generated parts to work together with regards to lookahead
// and error handling.
//
// For more information, see https://pkg.go.dev/golang.org/x/tools/cmd/goyacc.
func (p *parser) Lex(lval *yySymType) int {
var typ ItemType
if p.injecting {
p.injecting = false
return int(p.inject)
}
// Skip comments.
for {
p.lex.NextItem(&lval.item)
typ = lval.item.Typ
if typ != COMMENT {
break
}
}
switch typ {
case ERROR:
pos := PositionRange{
Start: p.lex.start,
End: Pos(len(p.lex.input)),
}
p.addParseErr(pos, errors.New(p.yyParser.lval.item.Val))
// Tells yacc that this is the end of input.
return 0
case EOF:
lval.item.Typ = EOF
p.InjectItem(0)
case RIGHT_BRACE, RIGHT_PAREN, RIGHT_BRACKET, DURATION, NUMBER:
p.lastClosing = lval.item.Pos + Pos(len(lval.item.Val))
}
return int(typ)
}
// Error is expected by the yyLexer interface of the yacc generated parser.
//
// It is a no-op since the parsers error routines are triggered
// by mechanisms that allow more fine-grained control
// For more information, see https://pkg.go.dev/golang.org/x/tools/cmd/goyacc.
func (p *parser) Error(string) {
}
// InjectItem allows injecting a single Item at the beginning of the token stream
// consumed by the generated parser.
// This allows having multiple start symbols as described in
// https://www.gnu.org/software/bison/manual/html_node/Multiple-start_002dsymbols.html .
// Only the Lex function used by the generated parser is affected by this injected Item.
// Trying to inject when a previously injected Item has not yet been consumed will panic.
// Only Item types that are supposed to be used as start symbols are allowed as an argument.
func (p *parser) InjectItem(typ ItemType) {
if p.injecting {
panic("cannot inject multiple Items into the token stream")
}
if typ != 0 && (typ <= startSymbolsStart || typ >= startSymbolsEnd) {
panic("cannot inject symbol that isn't start symbol")
}
p.inject = typ
p.injecting = true
}
func (p *parser) newBinaryExpression(lhs Node, op Item, modifiers, rhs Node) *BinaryExpr {
ret := modifiers.(*BinaryExpr)
ret.LHS = lhs.(Expr)
ret.RHS = rhs.(Expr)
ret.Op = op.Typ
return ret
}
func (p *parser) assembleVectorSelector(vs *VectorSelector) {
if vs.Name != "" {
nameMatcher, err := labels.NewMatcher(labels.MatchEqual, labels.MetricName, vs.Name)
if err != nil {
panic(err) // Must not happen with labels.MatchEqual
}
vs.LabelMatchers = append(vs.LabelMatchers, nameMatcher)
}
}
func (p *parser) newAggregateExpr(op Item, modifier, args Node) (ret *AggregateExpr) {
ret = modifier.(*AggregateExpr)
arguments := args.(Expressions)
ret.PosRange = PositionRange{
Start: op.Pos,
End: p.lastClosing,
}
ret.Op = op.Typ
if len(arguments) == 0 {
p.addParseErrf(ret.PositionRange(), "no arguments for aggregate expression provided")
// Prevents invalid array accesses.
return
}
desiredArgs := 1
if ret.Op.IsAggregatorWithParam() {
desiredArgs = 2
ret.Param = arguments[0]
}
if len(arguments) != desiredArgs {
p.addParseErrf(ret.PositionRange(), "wrong number of arguments for aggregate expression provided, expected %d, got %d", desiredArgs, len(arguments))
return
}
ret.Expr = arguments[desiredArgs-1]
return ret
}
// number parses a number.
func (p *parser) number(val string) float64 {
n, err := strconv.ParseInt(val, 0, 64)
f := float64(n)
if err != nil {
f, err = strconv.ParseFloat(val, 64)
}
if err != nil {
p.addParseErrf(p.yyParser.lval.item.PositionRange(), "error parsing number: %s", err)
}
return f
}
// expectType checks the type of the node and raises an error if it
// is not of the expected type.
func (p *parser) expectType(node Node, want ValueType, context string) {
t := p.checkAST(node)
if t != want {
p.addParseErrf(node.PositionRange(), "expected type %s in %s, got %s", DocumentedType(want), context, DocumentedType(t))
}
}
// checkAST checks the validity of the provided AST. This includes type checking.
func (p *parser) checkAST(node Node) (typ ValueType) {
// For expressions the type is determined by their Type function.
// Lists do not have a type but are not invalid either.
switch n := node.(type) {
case Expressions:
typ = ValueTypeNone
case Expr:
typ = n.Type()
default:
p.addParseErrf(node.PositionRange(), "unknown node type: %T", node)
}
// Recursively check correct typing for child nodes and raise
// errors in case of bad typing.
switch n := node.(type) {
case *EvalStmt:
ty := p.checkAST(n.Expr)
if ty == ValueTypeNone {
p.addParseErrf(n.Expr.PositionRange(), "evaluation statement must have a valid expression type but got %s", DocumentedType(ty))
}
case Expressions:
for _, e := range n {
ty := p.checkAST(e)
if ty == ValueTypeNone {
p.addParseErrf(e.PositionRange(), "expression must have a valid expression type but got %s", DocumentedType(ty))
}
}
case *AggregateExpr:
if !n.Op.IsAggregator() {
p.addParseErrf(n.PositionRange(), "aggregation operator expected in aggregation expression but got %q", n.Op)
}
p.expectType(n.Expr, ValueTypeVector, "aggregation expression")
if n.Op == TOPK || n.Op == BOTTOMK || n.Op == QUANTILE {
p.expectType(n.Param, ValueTypeScalar, "aggregation parameter")
}
if n.Op == COUNT_VALUES {
p.expectType(n.Param, ValueTypeString, "aggregation parameter")
}
case *BinaryExpr:
lt := p.checkAST(n.LHS)
rt := p.checkAST(n.RHS)
// opRange returns the PositionRange of the operator part of the BinaryExpr.
// This is made a function instead of a variable, so it is lazily evaluated on demand.
opRange := func() (r PositionRange) {
// Remove whitespace at the beginning and end of the range.
for r.Start = n.LHS.PositionRange().End; isSpace(rune(p.lex.input[r.Start])); r.Start++ { // nolint:revive
}
for r.End = n.RHS.PositionRange().Start - 1; isSpace(rune(p.lex.input[r.End])); r.End-- { // nolint:revive
}
return
}
if n.ReturnBool && !n.Op.IsComparisonOperator() {
p.addParseErrf(opRange(), "bool modifier can only be used on comparison operators")
}
if n.Op.IsComparisonOperator() && !n.ReturnBool && n.RHS.Type() == ValueTypeScalar && n.LHS.Type() == ValueTypeScalar {
p.addParseErrf(opRange(), "comparisons between scalars must use BOOL modifier")
}
if n.Op.IsSetOperator() && n.VectorMatching.Card == CardOneToOne {
n.VectorMatching.Card = CardManyToMany
}
for _, l1 := range n.VectorMatching.MatchingLabels {
for _, l2 := range n.VectorMatching.Include {
if l1 == l2 && n.VectorMatching.On {
p.addParseErrf(opRange(), "label %q must not occur in ON and GROUP clause at once", l1)
}
}
}
if !n.Op.IsOperator() {
p.addParseErrf(n.PositionRange(), "binary expression does not support operator %q", n.Op)
}
if lt != ValueTypeScalar && lt != ValueTypeVector {
p.addParseErrf(n.LHS.PositionRange(), "binary expression must contain only scalar and instant vector types")
}
if rt != ValueTypeScalar && rt != ValueTypeVector {
p.addParseErrf(n.RHS.PositionRange(), "binary expression must contain only scalar and instant vector types")
}
switch {
case (lt != ValueTypeVector || rt != ValueTypeVector) && n.VectorMatching != nil:
if len(n.VectorMatching.MatchingLabels) > 0 {
p.addParseErrf(n.PositionRange(), "vector matching only allowed between instant vectors")
}
n.VectorMatching = nil
case n.Op.IsSetOperator(): // Both operands are Vectors.
if n.VectorMatching.Card == CardOneToMany || n.VectorMatching.Card == CardManyToOne {
p.addParseErrf(n.PositionRange(), "no grouping allowed for %q operation", n.Op)
}
if n.VectorMatching.Card != CardManyToMany {
p.addParseErrf(n.PositionRange(), "set operations must always be many-to-many")
}
}
if (lt == ValueTypeScalar || rt == ValueTypeScalar) && n.Op.IsSetOperator() {
p.addParseErrf(n.PositionRange(), "set operator %q not allowed in binary scalar expression", n.Op)
}
case *Call:
nargs := len(n.Func.ArgTypes)
if n.Func.Variadic == 0 {
if nargs != len(n.Args) {
p.addParseErrf(n.PositionRange(), "expected %d argument(s) in call to %q, got %d", nargs, n.Func.Name, len(n.Args))
}
} else {
na := nargs - 1
if na > len(n.Args) {
p.addParseErrf(n.PositionRange(), "expected at least %d argument(s) in call to %q, got %d", na, n.Func.Name, len(n.Args))
} else if nargsmax := na + n.Func.Variadic; n.Func.Variadic > 0 && nargsmax < len(n.Args) {
p.addParseErrf(n.PositionRange(), "expected at most %d argument(s) in call to %q, got %d", nargsmax, n.Func.Name, len(n.Args))
}
}
for i, arg := range n.Args {
if i >= len(n.Func.ArgTypes) {
if n.Func.Variadic == 0 {
// This is not a vararg function so we should not check the
// type of the extra arguments.
break
}
i = len(n.Func.ArgTypes) - 1
}
p.expectType(arg, n.Func.ArgTypes[i], fmt.Sprintf("call to function %q", n.Func.Name))
}
case *ParenExpr:
p.checkAST(n.Expr)
case *UnaryExpr:
if n.Op != ADD && n.Op != SUB {
p.addParseErrf(n.PositionRange(), "only + and - operators allowed for unary expressions")
}
if t := p.checkAST(n.Expr); t != ValueTypeScalar && t != ValueTypeVector {
p.addParseErrf(n.PositionRange(), "unary expression only allowed on expressions of type scalar or instant vector, got %q", DocumentedType(t))
}
case *SubqueryExpr:
ty := p.checkAST(n.Expr)
if ty != ValueTypeVector {
p.addParseErrf(n.PositionRange(), "subquery is only allowed on instant vector, got %s instead", ty)
}
case *MatrixSelector:
p.checkAST(n.VectorSelector)
case *VectorSelector:
if n.Name != "" {
// In this case the last LabelMatcher is checking for the metric name
// set outside the braces. This checks if the name has already been set
// previously.
for _, m := range n.LabelMatchers[0 : len(n.LabelMatchers)-1] {
if m != nil && m.Name == labels.MetricName {
p.addParseErrf(n.PositionRange(), "metric name must not be set twice: %q or %q", n.Name, m.Value)
}
}
// Skip the check for non-empty matchers because an explicit
// metric name is a non-empty matcher.
break
}
// A Vector selector must contain at least one non-empty matcher to prevent
// implicit selection of all metrics (e.g. by a typo).
notEmpty := false
for _, lm := range n.LabelMatchers {
if lm != nil && !lm.Matches("") {
notEmpty = true
break
}
}
if !notEmpty {
p.addParseErrf(n.PositionRange(), "vector selector must contain at least one non-empty matcher")
}
case *NumberLiteral, *StringLiteral:
// Nothing to do for terminals.
default:
p.addParseErrf(n.PositionRange(), "unknown node type: %T", node)
}
return
}
func (p *parser) unquoteString(s string) string {
unquoted, err := strutil.Unquote(s)
if err != nil {
p.addParseErrf(p.yyParser.lval.item.PositionRange(), "error unquoting string %q: %s", s, err)
}
return unquoted
}
func parseDuration(ds string) (time.Duration, error) {
dur, err := model.ParseDuration(ds)
if err != nil {
return 0, err
}
if dur == 0 {
return 0, errors.New("duration must be greater than 0")
}
return time.Duration(dur), nil
}
// parseGenerated invokes the yacc generated parser.
// The generated parser gets the provided startSymbol injected into
// the lexer stream, based on which grammar will be used.
func (p *parser) parseGenerated(startSymbol ItemType) interface{} {
p.InjectItem(startSymbol)
p.yyParser.Parse(p)
return p.generatedParserResult
}
func (p *parser) newLabelMatcher(label, operator, value Item) *labels.Matcher {
op := operator.Typ
val := p.unquoteString(value.Val)
// Map the Item to the respective match type.
var matchType labels.MatchType
switch op {
case EQL:
matchType = labels.MatchEqual
case NEQ:
matchType = labels.MatchNotEqual
case EQL_REGEX:
matchType = labels.MatchRegexp
case NEQ_REGEX:
matchType = labels.MatchNotRegexp
default:
// This should never happen, since the error should have been caught
// by the generated parser.
panic("invalid operator")
}
m, err := labels.NewMatcher(matchType, label.Val, val)
if err != nil {
p.addParseErr(mergeRanges(&label, &value), err)
}
return m
}
// addOffset is used to set the offset in the generated parser.
func (p *parser) addOffset(e Node, offset time.Duration) {
var orgoffsetp *time.Duration
var endPosp *Pos
switch s := e.(type) {
case *VectorSelector:
orgoffsetp = &s.OriginalOffset
endPosp = &s.PosRange.End
case *MatrixSelector:
vs, ok := s.VectorSelector.(*VectorSelector)
if !ok {
p.addParseErrf(e.PositionRange(), "ranges only allowed for vector selectors")
return
}
orgoffsetp = &vs.OriginalOffset
endPosp = &s.EndPos
case *SubqueryExpr:
orgoffsetp = &s.OriginalOffset
endPosp = &s.EndPos
default:
p.addParseErrf(e.PositionRange(), "offset modifier must be preceded by an instant vector selector or range vector selector or a subquery")
return
}
// it is already ensured by parseDuration func that there never will be a zero offset modifier
switch {
case *orgoffsetp != 0:
p.addParseErrf(e.PositionRange(), "offset may not be set multiple times")
case orgoffsetp != nil:
*orgoffsetp = offset
}
*endPosp = p.lastClosing
}
// setTimestamp is used to set the timestamp from the @ modifier in the generated parser.
func (p *parser) setTimestamp(e Node, ts float64) {
if math.IsInf(ts, -1) || math.IsInf(ts, 1) || math.IsNaN(ts) ||
ts >= float64(math.MaxInt64) || ts <= float64(math.MinInt64) {
p.addParseErrf(e.PositionRange(), "timestamp out of bounds for @ modifier: %f", ts)
}
var timestampp **int64
var endPosp *Pos
timestampp, _, endPosp, ok := p.getAtModifierVars(e)
if !ok {
return
}
if timestampp != nil {
*timestampp = new(int64)
**timestampp = timestamp.FromFloatSeconds(ts)
}
*endPosp = p.lastClosing
}
// setAtModifierPreprocessor is used to set the preprocessor for the @ modifier.
func (p *parser) setAtModifierPreprocessor(e Node, op Item) {
_, preprocp, endPosp, ok := p.getAtModifierVars(e)
if !ok {
return
}
if preprocp != nil {
*preprocp = op.Typ
}
*endPosp = p.lastClosing
}
func (p *parser) getAtModifierVars(e Node) (**int64, *ItemType, *Pos, bool) {
var (
timestampp **int64
preprocp *ItemType
endPosp *Pos
)
switch s := e.(type) {
case *VectorSelector:
timestampp = &s.Timestamp
preprocp = &s.StartOrEnd
endPosp = &s.PosRange.End
case *MatrixSelector:
vs, ok := s.VectorSelector.(*VectorSelector)
if !ok {
p.addParseErrf(e.PositionRange(), "ranges only allowed for vector selectors")
return nil, nil, nil, false
}
preprocp = &vs.StartOrEnd
timestampp = &vs.Timestamp
endPosp = &s.EndPos
case *SubqueryExpr:
preprocp = &s.StartOrEnd
timestampp = &s.Timestamp
endPosp = &s.EndPos
default:
p.addParseErrf(e.PositionRange(), "@ modifier must be preceded by an instant vector selector or range vector selector or a subquery")
return nil, nil, nil, false
}
if *timestampp != nil || (*preprocp) == START || (*preprocp) == END {
p.addParseErrf(e.PositionRange(), "@ <timestamp> may not be set multiple times")
return nil, nil, nil, false
}
return timestampp, preprocp, endPosp, true
}
func MustLabelMatcher(mt labels.MatchType, name, val string) *labels.Matcher {
m, err := labels.NewMatcher(mt, name, val)
if err != nil {
panic(err)
}
return m
}
func MustGetFunction(name string) *Function {
f, ok := getFunction(name, Functions)
if !ok {
panic(fmt.Errorf("function %q does not exist", name))
}
return f
}