From 0584610080ad5e57f4ec8847d8d406bd753962f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Pazos?= Date: Thu, 9 Nov 2023 15:03:18 -0300 Subject: [PATCH] Use two uint32 to encode (offset,leng) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolás Pazos --- prompb/types.pb.go | 73 +++++++++++++++++++++++++-------- prompb/types.proto | 6 +-- storage/remote/codec.go | 19 +++++---- storage/remote/queue_manager.go | 22 ++++++---- 4 files changed, 83 insertions(+), 37 deletions(-) diff --git a/prompb/types.pb.go b/prompb/types.pb.go index cb3e27c24..ff0b3fc51 100644 --- a/prompb/types.pb.go +++ b/prompb/types.pb.go @@ -992,7 +992,7 @@ func (m *ReducedTimeSeries) GetMetadata() MetricMetadataRef { type MinimizedTimeSeries struct { // Sorted list of label name-value pair references. This list is always even. At even indices // there's the reference to label name, while at odd indices there's the reference to label value. - LabelSymbols []uint32 `protobuf:"fixed32,1,rep,packed,name=label_symbols,json=labelSymbols,proto3" json:"label_symbols,omitempty"` + LabelSymbols []uint32 `protobuf:"varint,1,rep,packed,name=label_symbols,json=labelSymbols,proto3" json:"label_symbols,omitempty"` // Sorted by time, oldest sample first. // TODO: support references for other types Samples []Sample `protobuf:"bytes,2,rep,name=samples,proto3" json:"samples"` @@ -1596,7 +1596,7 @@ var fileDescriptor_d938547f84707355 = []byte{ 0x37, 0x93, 0xbe, 0xf4, 0xaa, 0xa4, 0xf3, 0x68, 0xef, 0x28, 0x6f, 0xf2, 0x05, 0x58, 0x7e, 0x3a, 0xe5, 0xb0, 0xdd, 0x94, 0xf7, 0xaf, 0xfe, 0xed, 0x1c, 0x4c, 0x5d, 0x2c, 0x8d, 0xdc, 0x3f, 0x35, 0x38, 0xdf, 0xf6, 0x02, 0xcf, 0xf7, 0x9e, 0xad, 0x95, 0xee, 0x43, 0xa8, 0x62, 0x41, 0x06, 0x62, - 0xe1, 0x9f, 0x85, 0x69, 0x05, 0x4b, 0xa9, 0x79, 0x05, 0x45, 0xbd, 0x44, 0xf2, 0x5f, 0x7a, 0x26, + 0xe1, 0x9f, 0x85, 0x69, 0x05, 0xab, 0xa9, 0x79, 0x05, 0x45, 0xbd, 0x44, 0xf2, 0x5f, 0x7a, 0x26, 0x87, 0x60, 0x65, 0xd7, 0xae, 0xa6, 0xbc, 0xda, 0x8b, 0x70, 0xca, 0xe3, 0x68, 0xa0, 0x25, 0x45, 0x2b, 0xd1, 0xff, 0xc0, 0xc6, 0x59, 0x83, 0x32, 0x1d, 0x65, 0x16, 0x32, 0xd4, 0x0a, 0x70, 0x13, 0x8a, 0xe8, 0x43, 0x2d, 0x49, 0xb8, 0x58, 0x69, 0xc9, 0x92, 0xa4, 0xce, 0xeb, 0x23, 0xcb, 0x4e, @@ -1619,7 +1619,7 @@ var fileDescriptor_d938547f84707355 = []byte{ 0xb0, 0xb2, 0x4e, 0xba, 0xdf, 0x42, 0x15, 0x23, 0xf2, 0xd1, 0xdb, 0x76, 0xf9, 0x1b, 0x60, 0x0e, 0x95, 0x87, 0xec, 0xeb, 0xdd, 0xde, 0xc8, 0x26, 0x33, 0x48, 0xd4, 0x0e, 0x77, 0x9e, 0xbf, 0xd8, 0xd5, 0x7e, 0x79, 0xb1, 0xab, 0xfd, 0xfe, 0x62, 0x57, 0xfb, 0xc6, 0x54, 0xda, 0xd1, 0xd9, 0x99, - 0x89, 0x7f, 0x32, 0x9f, 0xfc, 0x15, 0x00, 0x00, 0xff, 0xff, 0xd7, 0x35, 0x2e, 0x7a, 0xfa, 0x0c, + 0x89, 0x7f, 0x32, 0x9f, 0xfc, 0x15, 0x00, 0x00, 0xff, 0xff, 0x1c, 0x7b, 0xd7, 0x1b, 0xfa, 0x0c, 0x00, 0x00, } @@ -2348,11 +2348,20 @@ func (m *MinimizedTimeSeries) MarshalToSizedBuffer(dAtA []byte) (int, error) { } } if len(m.LabelSymbols) > 0 { - for iNdEx := len(m.LabelSymbols) - 1; iNdEx >= 0; iNdEx-- { - i -= 4 - encoding_binary.LittleEndian.PutUint32(dAtA[i:], uint32(m.LabelSymbols[iNdEx])) + dAtA11 := make([]byte, len(m.LabelSymbols)*10) + var j10 int + for _, num := range m.LabelSymbols { + for num >= 1<<7 { + dAtA11[j10] = uint8(uint64(num)&0x7f | 0x80) + num >>= 7 + j10++ + } + dAtA11[j10] = uint8(num) + j10++ } - i = encodeVarintTypes(dAtA, i, uint64(len(m.LabelSymbols)*4)) + i -= j10 + copy(dAtA[i:], dAtA11[:j10]) + i = encodeVarintTypes(dAtA, i, uint64(j10)) i-- dAtA[i] = 0xa } @@ -3026,7 +3035,11 @@ func (m *MinimizedTimeSeries) Size() (n int) { var l int _ = l if len(m.LabelSymbols) > 0 { - n += 1 + sovTypes(uint64(len(m.LabelSymbols)*4)) + len(m.LabelSymbols)*4 + l = 0 + for _, e := range m.LabelSymbols { + l += sovTypes(uint64(e)) + } + n += 1 + sovTypes(uint64(l)) + l } if len(m.Samples) > 0 { for _, e := range m.Samples { @@ -4862,13 +4875,22 @@ func (m *MinimizedTimeSeries) Unmarshal(dAtA []byte) error { } switch fieldNum { case 1: - if wireType == 5 { + if wireType == 0 { var v uint32 - if (iNdEx + 4) > l { - return io.ErrUnexpectedEOF + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTypes + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= uint32(b&0x7F) << shift + if b < 0x80 { + break + } } - v = uint32(encoding_binary.LittleEndian.Uint32(dAtA[iNdEx:])) - iNdEx += 4 m.LabelSymbols = append(m.LabelSymbols, v) } else if wireType == 2 { var packedLen int @@ -4897,17 +4919,32 @@ func (m *MinimizedTimeSeries) Unmarshal(dAtA []byte) error { return io.ErrUnexpectedEOF } var elementCount int - elementCount = packedLen / 4 + var count int + for _, integer := range dAtA[iNdEx:postIndex] { + if integer < 128 { + count++ + } + } + elementCount = count if elementCount != 0 && len(m.LabelSymbols) == 0 { m.LabelSymbols = make([]uint32, 0, elementCount) } for iNdEx < postIndex { var v uint32 - if (iNdEx + 4) > l { - return io.ErrUnexpectedEOF + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTypes + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= uint32(b&0x7F) << shift + if b < 0x80 { + break + } } - v = uint32(encoding_binary.LittleEndian.Uint32(dAtA[iNdEx:])) - iNdEx += 4 m.LabelSymbols = append(m.LabelSymbols, v) } } else { diff --git a/prompb/types.proto b/prompb/types.proto index 3d857cda4..012a59682 100644 --- a/prompb/types.proto +++ b/prompb/types.proto @@ -169,9 +169,9 @@ message ReducedTimeSeries { // based on an experiment by marco message MinimizedTimeSeries { - // Sorted list of label name-value pair references. This list is always even. At even indices - // there's the reference to label name, while at odd indices there's the reference to label value. - repeated fixed32 label_symbols = 1 [(gogoproto.nullable) = false]; + // Sorted list of label name-value pair references. This list's len is always multiple of 4, + // packing tuples of (label name offset, label name length, label value offset, label value length). + repeated uint32 label_symbols = 1 [(gogoproto.nullable) = false]; // Sorted by time, oldest sample first. // TODO: support references for other types diff --git a/storage/remote/codec.go b/storage/remote/codec.go index 4ab881bf0..16a9c5f14 100644 --- a/storage/remote/codec.go +++ b/storage/remote/codec.go @@ -820,8 +820,10 @@ func labelsToLabelRefsProto(lbls labels.Labels, pool *lookupPool, buf []prompb.L func labelsToUint32Slice(lbls labels.Labels, symbolTable *rwSymbolTable, buf []uint32) []uint32 { result := buf[:0] lbls.Range(func(l labels.Label) { - result = append(result, symbolTable.Ref(l.Name)) - result = append(result, symbolTable.Ref(l.Value)) + off, leng := symbolTable.Ref(l.Name) + result = append(result, off, leng) + off, leng = symbolTable.Ref(l.Value) + result = append(result, off, leng) }) return result } @@ -832,15 +834,18 @@ func Uint32RefToLabels(symbols string, minLabels []uint32) labels.Labels { labelIdx := 0 for labelIdx < len(minLabels) { // todo, check for overflow? - offset, length := unpackRef(minLabels[labelIdx]) - + offset := minLabels[labelIdx] + labelIdx++ + length := minLabels[labelIdx] + labelIdx++ name := symbols[offset : offset+length] // todo, check for overflow? - offset, length = unpackRef(minLabels[labelIdx+1]) - + offset = minLabels[labelIdx] + labelIdx++ + length = minLabels[labelIdx] + labelIdx++ value := symbols[offset : offset+length] ls.Add(name, value) - labelIdx += 2 } return ls.Labels() diff --git a/storage/remote/queue_manager.go b/storage/remote/queue_manager.go index ec4456527..35f311c1d 100644 --- a/storage/remote/queue_manager.go +++ b/storage/remote/queue_manager.go @@ -1870,26 +1870,30 @@ func buildReducedWriteRequest(samples []prompb.ReducedTimeSeries, labels map[uin return compressed, highest, nil } +type offLenPair struct { + Off uint32 + Len uint32 +} + type rwSymbolTable struct { symbols strings.Builder - symbolsMap map[string]uint32 + symbolsMap map[string]offLenPair } func newRwSymbolTable() rwSymbolTable { return rwSymbolTable{ - symbolsMap: make(map[string]uint32), + symbolsMap: make(map[string]offLenPair), } } -func (r *rwSymbolTable) Ref(str string) uint32 { - // todo, check for overflowing the uint32 based on documented format? - if ref, ok := r.symbolsMap[str]; ok { - return ref +func (r *rwSymbolTable) Ref(str string) (off uint32, leng uint32) { + if offlen, ok := r.symbolsMap[str]; ok { + return offlen.Off, offlen.Len } - r.symbolsMap[str] = packRef(r.symbols.Len(), len(str)) + off, leng = uint32(r.symbols.Len()), uint32(len(str)) r.symbols.WriteString(str) - - return r.symbolsMap[str] + r.symbolsMap[str] = offLenPair{off, leng} + return } func (r *rwSymbolTable) LabelsString() string {