model/textparse: parsers take a labels SymbolTable

This allows strings to be interned to save memory.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
This commit is contained in:
Bryan Boreham 2023-03-25 13:31:24 +00:00
parent 123109e967
commit eff3a13e19
10 changed files with 42 additions and 29 deletions

View file

@ -127,7 +127,8 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
ctx := context.Background() ctx := context.Background()
app := w.Appender(ctx) app := w.Appender(ctx)
p := textparse.NewOpenMetricsParser(input) symbolTable := labels.NewSymbolTable() // One table per block means it won't grow too large.
p := textparse.NewOpenMetricsParser(input, symbolTable)
samplesCount := 0 samplesCount := 0
for { for {
e, err := p.Next() e, err := p.Next()
@ -216,7 +217,7 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
} }
func backfill(maxSamplesInAppender int, input []byte, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration) (err error) { func backfill(maxSamplesInAppender int, input []byte, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration) (err error) {
p := textparse.NewOpenMetricsParser(input) p := textparse.NewOpenMetricsParser(input, nil) // Don't need a SymbolTable to get max and min timestamps.
maxt, mint, err := getMinAndMaxTimestamps(p) maxt, mint, err := getMinAndMaxTimestamps(p)
if err != nil { if err != nil {
return fmt.Errorf("getting min and max timestamp: %w", err) return fmt.Errorf("getting min and max timestamp: %w", err)

View file

@ -80,22 +80,22 @@ type Parser interface {
// //
// This function always returns a valid parser, but might additionally // This function always returns a valid parser, but might additionally
// return an error if the content type cannot be parsed. // return an error if the content type cannot be parsed.
func New(b []byte, contentType string, parseClassicHistograms bool) (Parser, error) { func New(b []byte, contentType string, parseClassicHistograms bool, st *labels.SymbolTable) (Parser, error) {
if contentType == "" { if contentType == "" {
return NewPromParser(b), nil return NewPromParser(b, st), nil
} }
mediaType, _, err := mime.ParseMediaType(contentType) mediaType, _, err := mime.ParseMediaType(contentType)
if err != nil { if err != nil {
return NewPromParser(b), err return NewPromParser(b, st), err
} }
switch mediaType { switch mediaType {
case "application/openmetrics-text": case "application/openmetrics-text":
return NewOpenMetricsParser(b), nil return NewOpenMetricsParser(b, st), nil
case "application/vnd.google.protobuf": case "application/vnd.google.protobuf":
return NewProtobufParser(b, parseClassicHistograms), nil return NewProtobufParser(b, parseClassicHistograms, st), nil
default: default:
return NewPromParser(b), nil return NewPromParser(b, st), nil
} }
} }

View file

@ -17,6 +17,8 @@ import (
"testing" "testing"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/model/labels"
) )
func TestNewParser(t *testing.T) { func TestNewParser(t *testing.T) {
@ -91,7 +93,7 @@ func TestNewParser(t *testing.T) {
tt := tt // Copy to local variable before going parallel. tt := tt // Copy to local variable before going parallel.
t.Parallel() t.Parallel()
p, err := New([]byte{}, tt.contentType, false) p, err := New([]byte{}, tt.contentType, false, labels.NewSymbolTable())
tt.validateParser(t, p) tt.validateParser(t, p)
if tt.err == "" { if tt.err == "" {
require.NoError(t, err) require.NoError(t, err)

View file

@ -97,8 +97,11 @@ type OpenMetricsParser struct {
} }
// NewOpenMetricsParser returns a new parser of the byte slice. // NewOpenMetricsParser returns a new parser of the byte slice.
func NewOpenMetricsParser(b []byte) Parser { func NewOpenMetricsParser(b []byte, st *labels.SymbolTable) Parser {
return &OpenMetricsParser{l: &openMetricsLexer{b: b}} return &OpenMetricsParser{
l: &openMetricsLexer{b: b},
builder: labels.NewScratchBuilderWithSymbolTable(st, 16),
}
} }
// Series returns the bytes of the series, the timestamp if set, and the value // Series returns the bytes of the series, the timestamp if set, and the value

View file

@ -247,7 +247,7 @@ foo_total 17.0 1520879607.789 # {id="counter-test"} 5`
}, },
} }
p := NewOpenMetricsParser([]byte(input)) p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable())
i := 0 i := 0
var res labels.Labels var res labels.Labels
@ -378,7 +378,7 @@ choices}`, "strange©™\n'quoted' \"name\"", "6"),
}, },
} }
p := NewOpenMetricsParser([]byte(input)) p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable())
i := 0 i := 0
var res labels.Labels var res labels.Labels
@ -727,7 +727,7 @@ func TestOpenMetricsParseErrors(t *testing.T) {
} }
for i, c := range cases { for i, c := range cases {
p := NewOpenMetricsParser([]byte(c.input)) p := NewOpenMetricsParser([]byte(c.input), labels.NewSymbolTable())
var err error var err error
for err == nil { for err == nil {
_, err = p.Next() _, err = p.Next()
@ -792,7 +792,7 @@ func TestOMNullByteHandling(t *testing.T) {
} }
for i, c := range cases { for i, c := range cases {
p := NewOpenMetricsParser([]byte(c.input)) p := NewOpenMetricsParser([]byte(c.input), labels.NewSymbolTable())
var err error var err error
for err == nil { for err == nil {
_, err = p.Next() _, err = p.Next()

View file

@ -166,8 +166,11 @@ type PromParser struct {
} }
// NewPromParser returns a new parser of the byte slice. // NewPromParser returns a new parser of the byte slice.
func NewPromParser(b []byte) Parser { func NewPromParser(b []byte, st *labels.SymbolTable) Parser {
return &PromParser{l: &promlexer{b: append(b, '\n')}} return &PromParser{
l: &promlexer{b: append(b, '\n')},
builder: labels.NewScratchBuilderWithSymbolTable(st, 16),
}
} }
// Series returns the bytes of the series, the timestamp if set, and the value // Series returns the bytes of the series, the timestamp if set, and the value

View file

@ -178,7 +178,7 @@ testmetric{label="\"bar\""} 1`
}, },
} }
p := NewPromParser([]byte(input)) p := NewPromParser([]byte(input), labels.NewSymbolTable())
i := 0 i := 0
var res labels.Labels var res labels.Labels
@ -304,7 +304,7 @@ choices}`, "strange©™\n'quoted' \"name\"", "6"),
}, },
} }
p := NewPromParser([]byte(input)) p := NewPromParser([]byte(input), labels.NewSymbolTable())
i := 0 i := 0
var res labels.Labels var res labels.Labels
@ -422,7 +422,7 @@ func TestPromParseErrors(t *testing.T) {
} }
for i, c := range cases { for i, c := range cases {
p := NewPromParser([]byte(c.input)) p := NewPromParser([]byte(c.input), labels.NewSymbolTable())
var err error var err error
for err == nil { for err == nil {
_, err = p.Next() _, err = p.Next()
@ -476,7 +476,7 @@ func TestPromNullByteHandling(t *testing.T) {
} }
for i, c := range cases { for i, c := range cases {
p := NewPromParser([]byte(c.input)) p := NewPromParser([]byte(c.input), labels.NewSymbolTable())
var err error var err error
for err == nil { for err == nil {
_, err = p.Next() _, err = p.Next()
@ -497,7 +497,7 @@ const (
) )
func BenchmarkParse(b *testing.B) { func BenchmarkParse(b *testing.B) {
for parserName, parser := range map[string]func([]byte) Parser{ for parserName, parser := range map[string]func([]byte, *labels.SymbolTable) Parser{
"prometheus": NewPromParser, "prometheus": NewPromParser,
"openmetrics": NewOpenMetricsParser, "openmetrics": NewOpenMetricsParser,
} { } {
@ -516,8 +516,9 @@ func BenchmarkParse(b *testing.B) {
b.ReportAllocs() b.ReportAllocs()
b.ResetTimer() b.ResetTimer()
st := labels.NewSymbolTable()
for i := 0; i < b.N; i += promtestdataSampleCount { for i := 0; i < b.N; i += promtestdataSampleCount {
p := parser(buf) p := parser(buf, st)
Outer: Outer:
for i < b.N { for i < b.N {
@ -544,8 +545,9 @@ func BenchmarkParse(b *testing.B) {
b.ReportAllocs() b.ReportAllocs()
b.ResetTimer() b.ResetTimer()
st := labels.NewSymbolTable()
for i := 0; i < b.N; i += promtestdataSampleCount { for i := 0; i < b.N; i += promtestdataSampleCount {
p := parser(buf) p := parser(buf, st)
Outer: Outer:
for i < b.N { for i < b.N {
@ -577,8 +579,9 @@ func BenchmarkParse(b *testing.B) {
b.ReportAllocs() b.ReportAllocs()
b.ResetTimer() b.ResetTimer()
st := labels.NewSymbolTable()
for i := 0; i < b.N; i += promtestdataSampleCount { for i := 0; i < b.N; i += promtestdataSampleCount {
p := parser(buf) p := parser(buf, st)
Outer: Outer:
for i < b.N { for i < b.N {

View file

@ -80,13 +80,14 @@ type ProtobufParser struct {
} }
// NewProtobufParser returns a parser for the payload in the byte slice. // NewProtobufParser returns a parser for the payload in the byte slice.
func NewProtobufParser(b []byte, parseClassicHistograms bool) Parser { func NewProtobufParser(b []byte, parseClassicHistograms bool, st *labels.SymbolTable) Parser {
return &ProtobufParser{ return &ProtobufParser{
in: b, in: b,
state: EntryInvalid, state: EntryInvalid,
mf: &dto.MetricFamily{}, mf: &dto.MetricFamily{},
metricBytes: &bytes.Buffer{}, metricBytes: &bytes.Buffer{},
parseClassicHistograms: parseClassicHistograms, parseClassicHistograms: parseClassicHistograms,
builder: labels.NewScratchBuilderWithSymbolTable(st, 16),
} }
} }

View file

@ -743,7 +743,7 @@ func TestProtobufParse(t *testing.T) {
}{ }{
{ {
name: "ignore classic buckets of native histograms", name: "ignore classic buckets of native histograms",
parser: NewProtobufParser(inputBuf.Bytes(), false), parser: NewProtobufParser(inputBuf.Bytes(), false, labels.NewSymbolTable()),
expected: []parseResult{ expected: []parseResult{
{ {
m: "go_build_info", m: "go_build_info",
@ -1280,7 +1280,7 @@ func TestProtobufParse(t *testing.T) {
}, },
{ {
name: "parse classic and native buckets", name: "parse classic and native buckets",
parser: NewProtobufParser(inputBuf.Bytes(), true), parser: NewProtobufParser(inputBuf.Bytes(), true, labels.NewSymbolTable()),
expected: []parseResult{ expected: []parseResult{
{ // 0 { // 0
m: "go_build_info", m: "go_build_info",

View file

@ -391,7 +391,7 @@ func TestFederationWithNativeHistograms(t *testing.T) {
body, err := io.ReadAll(res.Body) body, err := io.ReadAll(res.Body)
require.NoError(t, err) require.NoError(t, err)
p := textparse.NewProtobufParser(body, false) p := textparse.NewProtobufParser(body, false, labels.NewSymbolTable())
var actVec promql.Vector var actVec promql.Vector
metricFamilies := 0 metricFamilies := 0
l := labels.Labels{} l := labels.Labels{}