From 7e86cd1ef4ecb45c6e5e343046f0a67c8de50dc6 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Wed, 30 Sep 2015 21:27:21 +0200 Subject: [PATCH] Add Unquote() strutil function. --- util/strutil/quote.go | 221 +++++++++++++++++++++++++++++++++++++ util/strutil/quote_test.go | 125 +++++++++++++++++++++ 2 files changed, 346 insertions(+) create mode 100644 util/strutil/quote.go create mode 100644 util/strutil/quote_test.go diff --git a/util/strutil/quote.go b/util/strutil/quote.go new file mode 100644 index 0000000000..81be1c5b67 --- /dev/null +++ b/util/strutil/quote.go @@ -0,0 +1,221 @@ +// Copyright 2015 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package strutil + +import ( + "errors" + "unicode/utf8" +) + +// ErrSyntax indicates that a value does not have the right syntax for the target type. +var ErrSyntax = errors.New("invalid syntax") + +// Unquote interprets s as a single-quoted, double-quoted, or backquoted +// Prometheus query language string literal, returning the string value that s +// quotes. +// +// NOTE: This function as well as the necessary helper functions below +// (unquoteChar, contains, unhex) and associated tests have been adapted from +// the corresponding functions in the "strconv" package of the Go standard +// library to work for Prometheus-style strings. +func Unquote(s string) (t string, err error) { + n := len(s) + if n < 2 { + return "", ErrSyntax + } + quote := s[0] + if quote != s[n-1] { + return "", ErrSyntax + } + s = s[1 : n-1] + + if quote == '`' { + if contains(s, '`') { + return "", ErrSyntax + } + return s, nil + } + if quote != '"' && quote != '\'' { + return "", ErrSyntax + } + if contains(s, '\n') { + return "", ErrSyntax + } + + // Is it trivial? Avoid allocation. + if !contains(s, '\\') && !contains(s, quote) { + return s, nil + } + + var runeTmp [utf8.UTFMax]byte + buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. + for len(s) > 0 { + c, multibyte, ss, err := unquoteChar(s, quote) + if err != nil { + return "", err + } + s = ss + if c < utf8.RuneSelf || !multibyte { + buf = append(buf, byte(c)) + } else { + n := utf8.EncodeRune(runeTmp[:], c) + buf = append(buf, runeTmp[:n]...) + } + } + return string(buf), nil +} + +// unquoteChar decodes the first character or byte in the escaped string +// or character literal represented by the string s. +// It returns four values: +// +// 1) value, the decoded Unicode code point or byte value; +// 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; +// 3) tail, the remainder of the string after the character; and +// 4) an error that will be nil if the character is syntactically valid. +// +// The second argument, quote, specifies the type of literal being parsed +// and therefore which escaped quote character is permitted. +// If set to a single quote, it permits the sequence \' and disallows unescaped '. +// If set to a double quote, it permits \" and disallows unescaped ". +// If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. +func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) { + // easy cases + switch c := s[0]; { + case c == quote && (quote == '\'' || quote == '"'): + err = ErrSyntax + return + case c >= utf8.RuneSelf: + r, size := utf8.DecodeRuneInString(s) + return r, true, s[size:], nil + case c != '\\': + return rune(s[0]), false, s[1:], nil + } + + // hard case: c is backslash + if len(s) <= 1 { + err = ErrSyntax + return + } + c := s[1] + s = s[2:] + + switch c { + case 'a': + value = '\a' + case 'b': + value = '\b' + case 'f': + value = '\f' + case 'n': + value = '\n' + case 'r': + value = '\r' + case 't': + value = '\t' + case 'v': + value = '\v' + case 'x', 'u', 'U': + n := 0 + switch c { + case 'x': + n = 2 + case 'u': + n = 4 + case 'U': + n = 8 + } + var v rune + if len(s) < n { + err = ErrSyntax + return + } + for j := 0; j < n; j++ { + x, ok := unhex(s[j]) + if !ok { + err = ErrSyntax + return + } + v = v<<4 | x + } + s = s[n:] + if c == 'x' { + // single-byte string, possibly not UTF-8 + value = v + break + } + if v > utf8.MaxRune { + err = ErrSyntax + return + } + value = v + multibyte = true + case '0', '1', '2', '3', '4', '5', '6', '7': + v := rune(c) - '0' + if len(s) < 2 { + err = ErrSyntax + return + } + for j := 0; j < 2; j++ { // one digit already; two more + x := rune(s[j]) - '0' + if x < 0 || x > 7 { + err = ErrSyntax + return + } + v = (v << 3) | x + } + s = s[2:] + if v > 255 { + err = ErrSyntax + return + } + value = v + case '\\': + value = '\\' + case '\'', '"': + if c != quote { + err = ErrSyntax + return + } + value = rune(c) + default: + err = ErrSyntax + return + } + tail = s + return +} + +// contains reports whether the string contains the byte c. +func contains(s string, c byte) bool { + for i := 0; i < len(s); i++ { + if s[i] == c { + return true + } + } + return false +} + +func unhex(b byte) (v rune, ok bool) { + c := rune(b) + switch { + case '0' <= c && c <= '9': + return c - '0', true + case 'a' <= c && c <= 'f': + return c - 'a' + 10, true + case 'A' <= c && c <= 'F': + return c - 'A' + 10, true + } + return +} diff --git a/util/strutil/quote_test.go b/util/strutil/quote_test.go new file mode 100644 index 0000000000..35bd6842c8 --- /dev/null +++ b/util/strutil/quote_test.go @@ -0,0 +1,125 @@ +// Copyright 2015 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package strutil + +import ( + "testing" +) + +type quoteTest struct { + in string + out string + ascii string +} + +var quotetests = []quoteTest{ + {"\a\b\f\r\n\t\v", `"\a\b\f\r\n\t\v"`, `"\a\b\f\r\n\t\v"`}, + {"\\", `"\\"`, `"\\"`}, + {"abc\xffdef", `"abc\xffdef"`, `"abc\xffdef"`}, + {"\u263a", `"☺"`, `"\u263a"`}, + {"\U0010ffff", `"\U0010ffff"`, `"\U0010ffff"`}, + {"\x04", `"\x04"`, `"\x04"`}, +} + +type unQuoteTest struct { + in string + out string +} + +var unquotetests = []unQuoteTest{ + {`""`, ""}, + {`"a"`, "a"}, + {`"abc"`, "abc"}, + {`"☺"`, "☺"}, + {`"hello world"`, "hello world"}, + {`"\xFF"`, "\xFF"}, + {`"\377"`, "\377"}, + {`"\u1234"`, "\u1234"}, + {`"\U00010111"`, "\U00010111"}, + {`"\U0001011111"`, "\U0001011111"}, + {`"\a\b\f\n\r\t\v\\\""`, "\a\b\f\n\r\t\v\\\""}, + {`"'"`, "'"}, + + {`''`, ""}, + {`'a'`, "a"}, + {`'abc'`, "abc"}, + {`'☺'`, "☺"}, + {`'hello world'`, "hello world"}, + {`'\xFF'`, "\xFF"}, + {`'\377'`, "\377"}, + {`'\u1234'`, "\u1234"}, + {`'\U00010111'`, "\U00010111"}, + {`'\U0001011111'`, "\U0001011111"}, + {`'\a\b\f\n\r\t\v\\\''`, "\a\b\f\n\r\t\v\\'"}, + {`'"'`, "\""}, + + {"``", ``}, + {"`a`", `a`}, + {"`abc`", `abc`}, + {"`☺`", `☺`}, + {"`hello world`", `hello world`}, + {"`\\xFF`", `\xFF`}, + {"`\\377`", `\377`}, + {"`\\`", `\`}, + {"`\n`", "\n"}, + {"` `", ` `}, +} + +var misquoted = []string{ + ``, + `"`, + `"a`, + `"'`, + `b"`, + `"\"`, + `"\9"`, + `"\19"`, + `"\129"`, + `'\'`, + `'\9'`, + `'\19'`, + `'\129'`, + `"\x1!"`, + `"\U12345678"`, + `"\z"`, + "`", + "`xxx", + "`\"", + `"\'"`, + `'\"'`, + "\"\n\"", + "\"\\n\n\"", + "'\n'", +} + +func TestUnquote(t *testing.T) { + for _, tt := range unquotetests { + if out, err := Unquote(tt.in); err != nil && out != tt.out { + t.Errorf("Unquote(%#q) = %q, %v want %q, nil", tt.in, out, err, tt.out) + } + } + + // run the quote tests too, backward + for _, tt := range quotetests { + if in, err := Unquote(tt.out); in != tt.in { + t.Errorf("Unquote(%#q) = %q, %v, want %q, nil", tt.out, in, err, tt.in) + } + } + + for _, s := range misquoted { + if out, err := Unquote(s); out != "" || err != ErrSyntax { + t.Errorf("Unquote(%#q) = %q, %v want %q, %v", s, out, err, "", ErrSyntax) + } + } +}