Use two uint32 to encode (offset,leng)

Signed-off-by: Nicolás Pazos <npazosmendez@gmail.com>
This commit is contained in:
Nicolás Pazos 2023-11-09 15:03:18 -03:00
parent 04fe4bfb20
commit 0584610080
4 changed files with 83 additions and 37 deletions

View file

@ -992,7 +992,7 @@ func (m *ReducedTimeSeries) GetMetadata() MetricMetadataRef {
type MinimizedTimeSeries struct {
// Sorted list of label name-value pair references. This list is always even. At even indices
// there's the reference to label name, while at odd indices there's the reference to label value.
LabelSymbols []uint32 `protobuf:"fixed32,1,rep,packed,name=label_symbols,json=labelSymbols,proto3" json:"label_symbols,omitempty"`
LabelSymbols []uint32 `protobuf:"varint,1,rep,packed,name=label_symbols,json=labelSymbols,proto3" json:"label_symbols,omitempty"`
// Sorted by time, oldest sample first.
// TODO: support references for other types
Samples []Sample `protobuf:"bytes,2,rep,name=samples,proto3" json:"samples"`
@ -1596,7 +1596,7 @@ var fileDescriptor_d938547f84707355 = []byte{
0x37, 0x93, 0xbe, 0xf4, 0xaa, 0xa4, 0xf3, 0x68, 0xef, 0x28, 0x6f, 0xf2, 0x05, 0x58, 0x7e, 0x3a,
0xe5, 0xb0, 0xdd, 0x94, 0xf7, 0xaf, 0xfe, 0xed, 0x1c, 0x4c, 0x5d, 0x2c, 0x8d, 0xdc, 0x3f, 0x35,
0x38, 0xdf, 0xf6, 0x02, 0xcf, 0xf7, 0x9e, 0xad, 0x95, 0xee, 0x43, 0xa8, 0x62, 0x41, 0x06, 0x62,
0xe1, 0x9f, 0x85, 0x69, 0x05, 0x4b, 0xa9, 0x79, 0x05, 0x45, 0xbd, 0x44, 0xf2, 0x5f, 0x7a, 0x26,
0xe1, 0x9f, 0x85, 0x69, 0x05, 0xab, 0xa9, 0x79, 0x05, 0x45, 0xbd, 0x44, 0xf2, 0x5f, 0x7a, 0x26,
0x87, 0x60, 0x65, 0xd7, 0xae, 0xa6, 0xbc, 0xda, 0x8b, 0x70, 0xca, 0xe3, 0x68, 0xa0, 0x25, 0x45,
0x2b, 0xd1, 0xff, 0xc0, 0xc6, 0x59, 0x83, 0x32, 0x1d, 0x65, 0x16, 0x32, 0xd4, 0x0a, 0x70, 0x13,
0x8a, 0xe8, 0x43, 0x2d, 0x49, 0xb8, 0x58, 0x69, 0xc9, 0x92, 0xa4, 0xce, 0xeb, 0x23, 0xcb, 0x4e,
@ -1619,7 +1619,7 @@ var fileDescriptor_d938547f84707355 = []byte{
0xb0, 0xb2, 0x4e, 0xba, 0xdf, 0x42, 0x15, 0x23, 0xf2, 0xd1, 0xdb, 0x76, 0xf9, 0x1b, 0x60, 0x0e,
0x95, 0x87, 0xec, 0xeb, 0xdd, 0xde, 0xc8, 0x26, 0x33, 0x48, 0xd4, 0x0e, 0x77, 0x9e, 0xbf, 0xd8,
0xd5, 0x7e, 0x79, 0xb1, 0xab, 0xfd, 0xfe, 0x62, 0x57, 0xfb, 0xc6, 0x54, 0xda, 0xd1, 0xd9, 0x99,
0x89, 0x7f, 0x32, 0x9f, 0xfc, 0x15, 0x00, 0x00, 0xff, 0xff, 0xd7, 0x35, 0x2e, 0x7a, 0xfa, 0x0c,
0x89, 0x7f, 0x32, 0x9f, 0xfc, 0x15, 0x00, 0x00, 0xff, 0xff, 0x1c, 0x7b, 0xd7, 0x1b, 0xfa, 0x0c,
0x00, 0x00,
}
@ -2348,11 +2348,20 @@ func (m *MinimizedTimeSeries) MarshalToSizedBuffer(dAtA []byte) (int, error) {
}
}
if len(m.LabelSymbols) > 0 {
for iNdEx := len(m.LabelSymbols) - 1; iNdEx >= 0; iNdEx-- {
i -= 4
encoding_binary.LittleEndian.PutUint32(dAtA[i:], uint32(m.LabelSymbols[iNdEx]))
dAtA11 := make([]byte, len(m.LabelSymbols)*10)
var j10 int
for _, num := range m.LabelSymbols {
for num >= 1<<7 {
dAtA11[j10] = uint8(uint64(num)&0x7f | 0x80)
num >>= 7
j10++
}
dAtA11[j10] = uint8(num)
j10++
}
i = encodeVarintTypes(dAtA, i, uint64(len(m.LabelSymbols)*4))
i -= j10
copy(dAtA[i:], dAtA11[:j10])
i = encodeVarintTypes(dAtA, i, uint64(j10))
i--
dAtA[i] = 0xa
}
@ -3026,7 +3035,11 @@ func (m *MinimizedTimeSeries) Size() (n int) {
var l int
_ = l
if len(m.LabelSymbols) > 0 {
n += 1 + sovTypes(uint64(len(m.LabelSymbols)*4)) + len(m.LabelSymbols)*4
l = 0
for _, e := range m.LabelSymbols {
l += sovTypes(uint64(e))
}
n += 1 + sovTypes(uint64(l)) + l
}
if len(m.Samples) > 0 {
for _, e := range m.Samples {
@ -4862,13 +4875,22 @@ func (m *MinimizedTimeSeries) Unmarshal(dAtA []byte) error {
}
switch fieldNum {
case 1:
if wireType == 5 {
if wireType == 0 {
var v uint32
if (iNdEx + 4) > l {
return io.ErrUnexpectedEOF
for shift := uint(0); ; shift += 7 {
if shift >= 64 {
return ErrIntOverflowTypes
}
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := dAtA[iNdEx]
iNdEx++
v |= uint32(b&0x7F) << shift
if b < 0x80 {
break
}
}
v = uint32(encoding_binary.LittleEndian.Uint32(dAtA[iNdEx:]))
iNdEx += 4
m.LabelSymbols = append(m.LabelSymbols, v)
} else if wireType == 2 {
var packedLen int
@ -4897,17 +4919,32 @@ func (m *MinimizedTimeSeries) Unmarshal(dAtA []byte) error {
return io.ErrUnexpectedEOF
}
var elementCount int
elementCount = packedLen / 4
var count int
for _, integer := range dAtA[iNdEx:postIndex] {
if integer < 128 {
count++
}
}
elementCount = count
if elementCount != 0 && len(m.LabelSymbols) == 0 {
m.LabelSymbols = make([]uint32, 0, elementCount)
}
for iNdEx < postIndex {
var v uint32
if (iNdEx + 4) > l {
return io.ErrUnexpectedEOF
for shift := uint(0); ; shift += 7 {
if shift >= 64 {
return ErrIntOverflowTypes
}
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := dAtA[iNdEx]
iNdEx++
v |= uint32(b&0x7F) << shift
if b < 0x80 {
break
}
}
v = uint32(encoding_binary.LittleEndian.Uint32(dAtA[iNdEx:]))
iNdEx += 4
m.LabelSymbols = append(m.LabelSymbols, v)
}
} else {

View file

@ -169,9 +169,9 @@ message ReducedTimeSeries {
// based on an experiment by marco
message MinimizedTimeSeries {
// Sorted list of label name-value pair references. This list is always even. At even indices
// there's the reference to label name, while at odd indices there's the reference to label value.
repeated fixed32 label_symbols = 1 [(gogoproto.nullable) = false];
// Sorted list of label name-value pair references. This list's len is always multiple of 4,
// packing tuples of (label name offset, label name length, label value offset, label value length).
repeated uint32 label_symbols = 1 [(gogoproto.nullable) = false];
// Sorted by time, oldest sample first.
// TODO: support references for other types

View file

@ -820,8 +820,10 @@ func labelsToLabelRefsProto(lbls labels.Labels, pool *lookupPool, buf []prompb.L
func labelsToUint32Slice(lbls labels.Labels, symbolTable *rwSymbolTable, buf []uint32) []uint32 {
result := buf[:0]
lbls.Range(func(l labels.Label) {
result = append(result, symbolTable.Ref(l.Name))
result = append(result, symbolTable.Ref(l.Value))
off, leng := symbolTable.Ref(l.Name)
result = append(result, off, leng)
off, leng = symbolTable.Ref(l.Value)
result = append(result, off, leng)
})
return result
}
@ -832,15 +834,18 @@ func Uint32RefToLabels(symbols string, minLabels []uint32) labels.Labels {
labelIdx := 0
for labelIdx < len(minLabels) {
// todo, check for overflow?
offset, length := unpackRef(minLabels[labelIdx])
offset := minLabels[labelIdx]
labelIdx++
length := minLabels[labelIdx]
labelIdx++
name := symbols[offset : offset+length]
// todo, check for overflow?
offset, length = unpackRef(minLabels[labelIdx+1])
offset = minLabels[labelIdx]
labelIdx++
length = minLabels[labelIdx]
labelIdx++
value := symbols[offset : offset+length]
ls.Add(name, value)
labelIdx += 2
}
return ls.Labels()

View file

@ -1870,26 +1870,30 @@ func buildReducedWriteRequest(samples []prompb.ReducedTimeSeries, labels map[uin
return compressed, highest, nil
}
type offLenPair struct {
Off uint32
Len uint32
}
type rwSymbolTable struct {
symbols strings.Builder
symbolsMap map[string]uint32
symbolsMap map[string]offLenPair
}
func newRwSymbolTable() rwSymbolTable {
return rwSymbolTable{
symbolsMap: make(map[string]uint32),
symbolsMap: make(map[string]offLenPair),
}
}
func (r *rwSymbolTable) Ref(str string) uint32 {
// todo, check for overflowing the uint32 based on documented format?
if ref, ok := r.symbolsMap[str]; ok {
return ref
func (r *rwSymbolTable) Ref(str string) (off uint32, leng uint32) {
if offlen, ok := r.symbolsMap[str]; ok {
return offlen.Off, offlen.Len
}
r.symbolsMap[str] = packRef(r.symbols.Len(), len(str))
off, leng = uint32(r.symbols.Len()), uint32(len(str))
r.symbols.WriteString(str)
return r.symbolsMap[str]
r.symbolsMap[str] = offLenPair{off, leng}
return
}
func (r *rwSymbolTable) LabelsString() string {