prometheus/tsdb/encoding/encoding.go
Brian Brazil 48d25e6fe7 Reduce memory used by postings offset table.
Rather than keeping the offset of each postings list, instead
keep the nth offset of the offset of the posting list. As postings
list offsets have always been sorted, we can then get to the closest
entry before the one we want an iterate forwards.

I haven't done much tuning on the 32 number, it was chosen to try
not to read through more than a 4k page of data.

Switch to a bulk interface for fetching postings. Use it to avoid having
to re-read parts of the posting offset table when querying lots of it.

For a index with what BenchmarkHeadPostingForMatchers uses RAM
for r.postings drops from 3.79MB to 80.19kB or about 48x.
Bytes allocated go down by 30%, and suprisingly CPU usage drops by
4-6% for typical queries too.

benchmark                                                               old ns/op      new ns/op      delta
BenchmarkPostingsForMatchers/Block/n="1"-4                              35231          36673          +4.09%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4                      563380         540627         -4.04%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4                      536782         534186         -0.48%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4                     533990         541550         +1.42%
BenchmarkPostingsForMatchers/Block/i=~".*"-4                            113374598      117969608      +4.05%
BenchmarkPostingsForMatchers/Block/i=~".+"-4                            146329884      139651442      -4.56%
BenchmarkPostingsForMatchers/Block/i=~""-4                              50346510       44961127       -10.70%
BenchmarkPostingsForMatchers/Block/i!=""-4                              41261550       35356165       -14.31%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4              112544418      116904010      +3.87%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4       112487086      116864918      +3.89%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4                        41094758       35457904       -13.72%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4                41906372       36151473       -13.73%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4              147262414      140424800      -4.64%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4             28615629       27872072       -2.60%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4       147117177      140462403      -4.52%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4     175096826      167902298      -4.11%

benchmark                                                               old allocs     new allocs     delta
BenchmarkPostingsForMatchers/Block/n="1"-4                              4              6              +50.00%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4                      7              11             +57.14%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4                      7              11             +57.14%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4                     15             17             +13.33%
BenchmarkPostingsForMatchers/Block/i=~".*"-4                            100010         100012         +0.00%
BenchmarkPostingsForMatchers/Block/i=~".+"-4                            200069         200040         -0.01%
BenchmarkPostingsForMatchers/Block/i=~""-4                              200072         200045         -0.01%
BenchmarkPostingsForMatchers/Block/i!=""-4                              200070         200041         -0.01%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4              100013         100017         +0.00%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4       100017         100023         +0.01%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4                        200073         200046         -0.01%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4                200075         200050         -0.01%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4              200074         200049         -0.01%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4             111165         111150         -0.01%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4       200078         200055         -0.01%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4     311282         311238         -0.01%

benchmark                                                               old bytes     new bytes     delta
BenchmarkPostingsForMatchers/Block/n="1"-4                              264           296           +12.12%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4                      360           424           +17.78%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4                      360           424           +17.78%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4                     520           552           +6.15%
BenchmarkPostingsForMatchers/Block/i=~".*"-4                            1600461       1600482       +0.00%
BenchmarkPostingsForMatchers/Block/i=~".+"-4                            24900801      17259077      -30.69%
BenchmarkPostingsForMatchers/Block/i=~""-4                              24900836      17259151      -30.69%
BenchmarkPostingsForMatchers/Block/i!=""-4                              24900760      17259048      -30.69%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4              1600557       1600621       +0.00%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4       1600717       1600813       +0.01%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4                        24900856      17259176      -30.69%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4                24900952      17259304      -30.69%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4              24900993      17259333      -30.69%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4             3788311       3142630       -17.04%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4       24901137      17259509      -30.69%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4     28693086      20405680      -28.88%

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
2019-12-11 19:59:31 +00:00

272 lines
6.6 KiB
Go

// Copyright 2018 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package encoding
import (
"encoding/binary"
"hash"
"hash/crc32"
"unsafe"
"github.com/pkg/errors"
)
var (
ErrInvalidSize = errors.New("invalid size")
ErrInvalidChecksum = errors.New("invalid checksum")
)
// Encbuf is a helper type to populate a byte slice with various types.
type Encbuf struct {
B []byte
C [binary.MaxVarintLen64]byte
}
func (e *Encbuf) Reset() { e.B = e.B[:0] }
func (e *Encbuf) Get() []byte { return e.B }
func (e *Encbuf) Len() int { return len(e.B) }
func (e *Encbuf) PutString(s string) { e.B = append(e.B, s...) }
func (e *Encbuf) PutByte(c byte) { e.B = append(e.B, c) }
func (e *Encbuf) PutBE32int(x int) { e.PutBE32(uint32(x)) }
func (e *Encbuf) PutUvarint32(x uint32) { e.PutUvarint64(uint64(x)) }
func (e *Encbuf) PutBE64int64(x int64) { e.PutBE64(uint64(x)) }
func (e *Encbuf) PutUvarint(x int) { e.PutUvarint64(uint64(x)) }
func (e *Encbuf) PutBE32(x uint32) {
binary.BigEndian.PutUint32(e.C[:], x)
e.B = append(e.B, e.C[:4]...)
}
func (e *Encbuf) PutBE64(x uint64) {
binary.BigEndian.PutUint64(e.C[:], x)
e.B = append(e.B, e.C[:8]...)
}
func (e *Encbuf) PutUvarint64(x uint64) {
n := binary.PutUvarint(e.C[:], x)
e.B = append(e.B, e.C[:n]...)
}
func (e *Encbuf) PutVarint64(x int64) {
n := binary.PutVarint(e.C[:], x)
e.B = append(e.B, e.C[:n]...)
}
// PutUvarintStr writes a string to the buffer prefixed by its varint length (in bytes!).
func (e *Encbuf) PutUvarintStr(s string) {
b := *(*[]byte)(unsafe.Pointer(&s))
e.PutUvarint(len(b))
e.PutString(s)
}
// PutHash appends a hash over the buffers current contents to the buffer.
func (e *Encbuf) PutHash(h hash.Hash) {
h.Reset()
e.WriteToHash(h)
e.PutHashSum(h)
}
// WriteToHash writes the current buffer contents to the given hash.
func (e *Encbuf) WriteToHash(h hash.Hash) {
_, err := h.Write(e.B)
if err != nil {
panic(err) // The CRC32 implementation does not error
}
}
// PutHashSum writes the Sum of the given hash to the buffer.
func (e *Encbuf) PutHashSum(h hash.Hash) {
e.B = h.Sum(e.B)
}
// Decbuf provides safe methods to extract data from a byte slice. It does all
// necessary bounds checking and advancing of the byte slice.
// Several datums can be extracted without checking for errors. However, before using
// any datum, the err() method must be checked.
type Decbuf struct {
B []byte
E error
}
// NewDecbufAt returns a new decoding buffer. It expects the first 4 bytes
// after offset to hold the big endian encoded content length, followed by the contents and the expected
// checksum.
func NewDecbufAt(bs ByteSlice, off int, castagnoliTable *crc32.Table) Decbuf {
if bs.Len() < off+4 {
return Decbuf{E: ErrInvalidSize}
}
b := bs.Range(off, off+4)
l := int(binary.BigEndian.Uint32(b))
if bs.Len() < off+4+l+4 {
return Decbuf{E: ErrInvalidSize}
}
// Load bytes holding the contents plus a CRC32 checksum.
b = bs.Range(off+4, off+4+l+4)
dec := Decbuf{B: b[:len(b)-4]}
if castagnoliTable != nil {
if exp := binary.BigEndian.Uint32(b[len(b)-4:]); dec.Crc32(castagnoliTable) != exp {
return Decbuf{E: ErrInvalidChecksum}
}
}
return dec
}
// NewDecbufUvarintAt returns a new decoding buffer. It expects the first bytes
// after offset to hold the uvarint-encoded buffers length, followed by the contents and the expected
// checksum.
func NewDecbufUvarintAt(bs ByteSlice, off int, castagnoliTable *crc32.Table) Decbuf {
// We never have to access this method at the far end of the byte slice. Thus just checking
// against the MaxVarintLen32 is sufficient.
if bs.Len() < off+binary.MaxVarintLen32 {
return Decbuf{E: ErrInvalidSize}
}
b := bs.Range(off, off+binary.MaxVarintLen32)
l, n := binary.Uvarint(b)
if n <= 0 || n > binary.MaxVarintLen32 {
return Decbuf{E: errors.Errorf("invalid uvarint %d", n)}
}
if bs.Len() < off+n+int(l)+4 {
return Decbuf{E: ErrInvalidSize}
}
// Load bytes holding the contents plus a CRC32 checksum.
b = bs.Range(off+n, off+n+int(l)+4)
dec := Decbuf{B: b[:len(b)-4]}
if dec.Crc32(castagnoliTable) != binary.BigEndian.Uint32(b[len(b)-4:]) {
return Decbuf{E: ErrInvalidChecksum}
}
return dec
}
func (d *Decbuf) Uvarint() int { return int(d.Uvarint64()) }
func (d *Decbuf) Be32int() int { return int(d.Be32()) }
func (d *Decbuf) Be64int64() int64 { return int64(d.Be64()) }
// Crc32 returns a CRC32 checksum over the remaining bytes.
func (d *Decbuf) Crc32(castagnoliTable *crc32.Table) uint32 {
return crc32.Checksum(d.B, castagnoliTable)
}
func (d *Decbuf) Skip(l int) {
if len(d.B) < l {
d.E = ErrInvalidSize
return
}
d.B = d.B[l:]
}
func (d *Decbuf) UvarintStr() string {
return string(d.UvarintBytes())
}
// The return value becomes invalid if the byte slice goes away.
// Compared to UvarintStr, this avoid allocations.
func (d *Decbuf) UvarintBytes() []byte {
l := d.Uvarint64()
if d.E != nil {
return []byte{}
}
if len(d.B) < int(l) {
d.E = ErrInvalidSize
return []byte{}
}
s := d.B[:l]
d.B = d.B[l:]
return s
}
func (d *Decbuf) Varint64() int64 {
if d.E != nil {
return 0
}
x, n := binary.Varint(d.B)
if n < 1 {
d.E = ErrInvalidSize
return 0
}
d.B = d.B[n:]
return x
}
func (d *Decbuf) Uvarint64() uint64 {
if d.E != nil {
return 0
}
x, n := binary.Uvarint(d.B)
if n < 1 {
d.E = ErrInvalidSize
return 0
}
d.B = d.B[n:]
return x
}
func (d *Decbuf) Be64() uint64 {
if d.E != nil {
return 0
}
if len(d.B) < 8 {
d.E = ErrInvalidSize
return 0
}
x := binary.BigEndian.Uint64(d.B)
d.B = d.B[8:]
return x
}
func (d *Decbuf) Be32() uint32 {
if d.E != nil {
return 0
}
if len(d.B) < 4 {
d.E = ErrInvalidSize
return 0
}
x := binary.BigEndian.Uint32(d.B)
d.B = d.B[4:]
return x
}
func (d *Decbuf) Byte() byte {
if d.E != nil {
return 0
}
if len(d.B) < 1 {
d.E = ErrInvalidSize
return 0
}
x := d.B[0]
d.B = d.B[1:]
return x
}
func (d *Decbuf) Err() error { return d.E }
func (d *Decbuf) Len() int { return len(d.B) }
func (d *Decbuf) Get() []byte { return d.B }
// ByteSlice abstracts a byte slice.
type ByteSlice interface {
Len() int
Range(start, end int) []byte
}