model/textparse: parsers take a labels SymbolTable

This allows strings to be interned to save memory.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
This commit is contained in:
Bryan Boreham 2023-03-25 13:31:24 +00:00
parent 123109e967
commit eff3a13e19
10 changed files with 42 additions and 29 deletions

View file

@ -127,7 +127,8 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
ctx := context.Background()
app := w.Appender(ctx)
p := textparse.NewOpenMetricsParser(input)
symbolTable := labels.NewSymbolTable() // One table per block means it won't grow too large.
p := textparse.NewOpenMetricsParser(input, symbolTable)
samplesCount := 0
for {
e, err := p.Next()
@ -216,7 +217,7 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
}
func backfill(maxSamplesInAppender int, input []byte, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration) (err error) {
p := textparse.NewOpenMetricsParser(input)
p := textparse.NewOpenMetricsParser(input, nil) // Don't need a SymbolTable to get max and min timestamps.
maxt, mint, err := getMinAndMaxTimestamps(p)
if err != nil {
return fmt.Errorf("getting min and max timestamp: %w", err)

View file

@ -80,22 +80,22 @@ type Parser interface {
//
// This function always returns a valid parser, but might additionally
// return an error if the content type cannot be parsed.
func New(b []byte, contentType string, parseClassicHistograms bool) (Parser, error) {
func New(b []byte, contentType string, parseClassicHistograms bool, st *labels.SymbolTable) (Parser, error) {
if contentType == "" {
return NewPromParser(b), nil
return NewPromParser(b, st), nil
}
mediaType, _, err := mime.ParseMediaType(contentType)
if err != nil {
return NewPromParser(b), err
return NewPromParser(b, st), err
}
switch mediaType {
case "application/openmetrics-text":
return NewOpenMetricsParser(b), nil
return NewOpenMetricsParser(b, st), nil
case "application/vnd.google.protobuf":
return NewProtobufParser(b, parseClassicHistograms), nil
return NewProtobufParser(b, parseClassicHistograms, st), nil
default:
return NewPromParser(b), nil
return NewPromParser(b, st), nil
}
}

View file

@ -17,6 +17,8 @@ import (
"testing"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/model/labels"
)
func TestNewParser(t *testing.T) {
@ -91,7 +93,7 @@ func TestNewParser(t *testing.T) {
tt := tt // Copy to local variable before going parallel.
t.Parallel()
p, err := New([]byte{}, tt.contentType, false)
p, err := New([]byte{}, tt.contentType, false, labels.NewSymbolTable())
tt.validateParser(t, p)
if tt.err == "" {
require.NoError(t, err)

View file

@ -97,8 +97,11 @@ type OpenMetricsParser struct {
}
// NewOpenMetricsParser returns a new parser of the byte slice.
func NewOpenMetricsParser(b []byte) Parser {
return &OpenMetricsParser{l: &openMetricsLexer{b: b}}
func NewOpenMetricsParser(b []byte, st *labels.SymbolTable) Parser {
return &OpenMetricsParser{
l: &openMetricsLexer{b: b},
builder: labels.NewScratchBuilderWithSymbolTable(st, 16),
}
}
// Series returns the bytes of the series, the timestamp if set, and the value

View file

@ -247,7 +247,7 @@ foo_total 17.0 1520879607.789 # {id="counter-test"} 5`
},
}
p := NewOpenMetricsParser([]byte(input))
p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable())
i := 0
var res labels.Labels
@ -378,7 +378,7 @@ choices}`, "strange©™\n'quoted' \"name\"", "6"),
},
}
p := NewOpenMetricsParser([]byte(input))
p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable())
i := 0
var res labels.Labels
@ -727,7 +727,7 @@ func TestOpenMetricsParseErrors(t *testing.T) {
}
for i, c := range cases {
p := NewOpenMetricsParser([]byte(c.input))
p := NewOpenMetricsParser([]byte(c.input), labels.NewSymbolTable())
var err error
for err == nil {
_, err = p.Next()
@ -792,7 +792,7 @@ func TestOMNullByteHandling(t *testing.T) {
}
for i, c := range cases {
p := NewOpenMetricsParser([]byte(c.input))
p := NewOpenMetricsParser([]byte(c.input), labels.NewSymbolTable())
var err error
for err == nil {
_, err = p.Next()

View file

@ -166,8 +166,11 @@ type PromParser struct {
}
// NewPromParser returns a new parser of the byte slice.
func NewPromParser(b []byte) Parser {
return &PromParser{l: &promlexer{b: append(b, '\n')}}
func NewPromParser(b []byte, st *labels.SymbolTable) Parser {
return &PromParser{
l: &promlexer{b: append(b, '\n')},
builder: labels.NewScratchBuilderWithSymbolTable(st, 16),
}
}
// Series returns the bytes of the series, the timestamp if set, and the value

View file

@ -178,7 +178,7 @@ testmetric{label="\"bar\""} 1`
},
}
p := NewPromParser([]byte(input))
p := NewPromParser([]byte(input), labels.NewSymbolTable())
i := 0
var res labels.Labels
@ -304,7 +304,7 @@ choices}`, "strange©™\n'quoted' \"name\"", "6"),
},
}
p := NewPromParser([]byte(input))
p := NewPromParser([]byte(input), labels.NewSymbolTable())
i := 0
var res labels.Labels
@ -422,7 +422,7 @@ func TestPromParseErrors(t *testing.T) {
}
for i, c := range cases {
p := NewPromParser([]byte(c.input))
p := NewPromParser([]byte(c.input), labels.NewSymbolTable())
var err error
for err == nil {
_, err = p.Next()
@ -476,7 +476,7 @@ func TestPromNullByteHandling(t *testing.T) {
}
for i, c := range cases {
p := NewPromParser([]byte(c.input))
p := NewPromParser([]byte(c.input), labels.NewSymbolTable())
var err error
for err == nil {
_, err = p.Next()
@ -497,7 +497,7 @@ const (
)
func BenchmarkParse(b *testing.B) {
for parserName, parser := range map[string]func([]byte) Parser{
for parserName, parser := range map[string]func([]byte, *labels.SymbolTable) Parser{
"prometheus": NewPromParser,
"openmetrics": NewOpenMetricsParser,
} {
@ -516,8 +516,9 @@ func BenchmarkParse(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
st := labels.NewSymbolTable()
for i := 0; i < b.N; i += promtestdataSampleCount {
p := parser(buf)
p := parser(buf, st)
Outer:
for i < b.N {
@ -544,8 +545,9 @@ func BenchmarkParse(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
st := labels.NewSymbolTable()
for i := 0; i < b.N; i += promtestdataSampleCount {
p := parser(buf)
p := parser(buf, st)
Outer:
for i < b.N {
@ -577,8 +579,9 @@ func BenchmarkParse(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
st := labels.NewSymbolTable()
for i := 0; i < b.N; i += promtestdataSampleCount {
p := parser(buf)
p := parser(buf, st)
Outer:
for i < b.N {

View file

@ -80,13 +80,14 @@ type ProtobufParser struct {
}
// NewProtobufParser returns a parser for the payload in the byte slice.
func NewProtobufParser(b []byte, parseClassicHistograms bool) Parser {
func NewProtobufParser(b []byte, parseClassicHistograms bool, st *labels.SymbolTable) Parser {
return &ProtobufParser{
in: b,
state: EntryInvalid,
mf: &dto.MetricFamily{},
metricBytes: &bytes.Buffer{},
parseClassicHistograms: parseClassicHistograms,
builder: labels.NewScratchBuilderWithSymbolTable(st, 16),
}
}

View file

@ -743,7 +743,7 @@ func TestProtobufParse(t *testing.T) {
}{
{
name: "ignore classic buckets of native histograms",
parser: NewProtobufParser(inputBuf.Bytes(), false),
parser: NewProtobufParser(inputBuf.Bytes(), false, labels.NewSymbolTable()),
expected: []parseResult{
{
m: "go_build_info",
@ -1280,7 +1280,7 @@ func TestProtobufParse(t *testing.T) {
},
{
name: "parse classic and native buckets",
parser: NewProtobufParser(inputBuf.Bytes(), true),
parser: NewProtobufParser(inputBuf.Bytes(), true, labels.NewSymbolTable()),
expected: []parseResult{
{ // 0
m: "go_build_info",

View file

@ -391,7 +391,7 @@ func TestFederationWithNativeHistograms(t *testing.T) {
body, err := io.ReadAll(res.Body)
require.NoError(t, err)
p := textparse.NewProtobufParser(body, false)
p := textparse.NewProtobufParser(body, false, labels.NewSymbolTable())
var actVec promql.Vector
metricFamilies := 0
l := labels.Labels{}