prometheus/tsdb/symbols_batch.go

381 lines
7.8 KiB
Go
Raw Normal View History

package tsdb
import (
"container/heap"
"encoding/gob"
"fmt"
"io"
"os"
"path/filepath"
"sort"
"sync"
"github.com/golang/snappy"
"github.com/prometheus/prometheus/tsdb/errors"
)
// symbolFlushers writes symbols to provided files in background goroutines.
type symbolFlushers struct {
jobs chan flusherJob
wg sync.WaitGroup
closed bool
errMu sync.Mutex
err error
pool *sync.Pool
}
func newSymbolFlushers(concurrency int) *symbolFlushers {
f := &symbolFlushers{
jobs: make(chan flusherJob),
pool: &sync.Pool{},
}
for i := 0; i < concurrency; i++ {
f.wg.Add(1)
go f.loop()
}
return f
}
func (f *symbolFlushers) flushSymbols(outputFile string, symbols map[string]struct{}) error {
if len(symbols) == 0 {
return fmt.Errorf("no symbols")
}
f.errMu.Lock()
err := f.err
f.errMu.Unlock()
// If there was any error previously, return it.
if err != nil {
return err
}
f.jobs <- flusherJob{
outputFile: outputFile,
symbols: symbols,
}
return nil
}
func (f *symbolFlushers) loop() {
defer f.wg.Done()
for j := range f.jobs {
var sortedSymbols []string
pooled := f.pool.Get()
if pooled == nil {
sortedSymbols = make([]string, 0, len(j.symbols))
} else {
sortedSymbols = pooled.([]string)
sortedSymbols = sortedSymbols[:0]
}
for s := range j.symbols {
sortedSymbols = append(sortedSymbols, s)
}
sort.Strings(sortedSymbols)
err := writeSymbolsToFile(j.outputFile, sortedSymbols)
sortedSymbols = sortedSymbols[:0]
//nolint:staticcheck // Ignore SA6002 safe to ignore and actually fixing it has some performance penalty.
f.pool.Put(sortedSymbols)
if err != nil {
f.errMu.Lock()
if f.err == nil {
f.err = err
}
f.errMu.Unlock()
break
}
}
for range f.jobs { //nolint:revive // This "empty" block is intentional
// drain the channel, don't do more flushing. only used when error occurs.
}
}
// Stops and waits until all flusher goroutines are finished.
func (f *symbolFlushers) close() error {
if f.closed {
return f.err
}
f.closed = true
close(f.jobs)
f.wg.Wait()
return f.err
}
type flusherJob struct {
outputFile string
symbols map[string]struct{}
}
// symbolsBatcher keeps buffer of symbols in memory. Once the buffer reaches the size limit (number of symbols),
// batcher writes currently buffered symbols to file. At the end remaining symbols must be flushed. After writing
// all batches, symbolsBatcher has list of files that can be used together with newSymbolsIterator to iterate
// through all previously added symbols in sorted order.
type symbolsBatcher struct {
dir string
limit int
symbolsFiles []string // paths of symbol files, which were sent to flushers for flushing
buffer map[string]struct{} // using map to deduplicate
flushers *symbolFlushers
}
func newSymbolsBatcher(limit int, dir string, flushers *symbolFlushers) *symbolsBatcher {
return &symbolsBatcher{
limit: limit,
dir: dir,
buffer: make(map[string]struct{}, limit),
flushers: flushers,
}
}
func (sw *symbolsBatcher) addSymbol(sym string) error {
sw.buffer[sym] = struct{}{}
return sw.flushSymbols(false)
}
func (sw *symbolsBatcher) flushSymbols(force bool) error {
if !force && len(sw.buffer) < sw.limit {
return nil
}
if len(sw.buffer) == 0 {
return nil
}
symbolsFile := filepath.Join(sw.dir, fmt.Sprintf("symbols_%d", len(sw.symbolsFiles)))
sw.symbolsFiles = append(sw.symbolsFiles, symbolsFile)
buf := sw.buffer
sw.buffer = make(map[string]struct{}, sw.limit)
return sw.flushers.flushSymbols(symbolsFile, buf)
}
// getSymbolFiles returns list of symbol files used to flush symbols to. These files are only valid if flushers
// finish successfully.
func (sw *symbolsBatcher) getSymbolFiles() []string {
return sw.symbolsFiles
}
func writeSymbolsToFile(filename string, symbols []string) error {
f, err := os.Create(filename)
if err != nil {
return err
}
2021-10-06 02:04:37 -07:00
// Snappy is used for buffering and to create smaller files.
sn := snappy.NewBufferedWriter(f)
enc := gob.NewEncoder(sn)
errs := errors.NewMulti()
for _, s := range symbols {
err := enc.Encode(s)
if err != nil {
errs.Add(err)
break
}
}
errs.Add(sn.Close())
errs.Add(f.Close())
return errs.Err()
}
2021-10-06 02:04:37 -07:00
// Implements heap.Interface using symbols from files.
type symbolsHeap []*symbolsFile
2021-10-06 02:04:37 -07:00
// Len implements sort.Interface.
func (s *symbolsHeap) Len() int {
return len(*s)
}
2021-10-06 02:04:37 -07:00
// Less implements sort.Interface.
func (s *symbolsHeap) Less(i, j int) bool {
iw, ierr := (*s)[i].Peek()
if ierr != nil {
2021-10-06 02:04:37 -07:00
// Empty string will be sorted first, so error will be returned before any other result.
iw = ""
}
jw, jerr := (*s)[j].Peek()
if jerr != nil {
jw = ""
}
return iw < jw
}
2021-10-06 02:04:37 -07:00
// Swap implements sort.Interface.
func (s *symbolsHeap) Swap(i, j int) {
(*s)[i], (*s)[j] = (*s)[j], (*s)[i]
}
2021-10-06 02:04:37 -07:00
// Push implements heap.Interface. Push should add x as element Len().
func (s *symbolsHeap) Push(x interface{}) {
2021-10-06 02:04:37 -07:00
*s = append(*s, x.(*symbolsFile))
}
2021-10-06 02:04:37 -07:00
// Pop implements heap.Interface. Pop should remove and return element Len() - 1.
func (s *symbolsHeap) Pop() interface{} {
l := len(*s)
res := (*s)[l-1]
*s = (*s)[:l-1]
return res
}
type symbolsIterator struct {
files []*os.File
heap symbolsHeap
// To avoid returning duplicates, we remember last returned symbol. We want to support "" as a valid
// symbol, so we use pointer to a string instead.
lastReturned *string
}
func newSymbolsIterator(filenames []string) (*symbolsIterator, error) {
files, err := openFiles(filenames)
if err != nil {
return nil, err
}
var symFiles []*symbolsFile
for _, f := range files {
symFiles = append(symFiles, newSymbolsFile(f))
}
h := &symbolsIterator{
files: files,
heap: symFiles,
}
heap.Init(&h.heap)
return h, nil
}
// NextSymbol advances iterator forward, and returns next symbol.
// If there is no next element, returns err == io.EOF.
func (sit *symbolsIterator) NextSymbol() (string, error) {
2021-10-06 02:04:37 -07:00
for len(sit.heap) > 0 {
result, err := sit.heap[0].Next()
if err == io.EOF {
// End of file, remove it from heap, and try next file.
heap.Remove(&sit.heap, 0)
continue
}
2021-10-06 02:04:37 -07:00
if err != nil {
return "", err
}
2021-10-06 02:04:37 -07:00
heap.Fix(&sit.heap, 0)
2021-10-06 02:04:37 -07:00
if sit.lastReturned != nil && *sit.lastReturned == result {
// Duplicate symbol, try next one.
continue
}
sit.lastReturned = &result
return result, nil
}
2021-10-06 02:04:37 -07:00
return "", io.EOF
}
// Close all files.
func (sit *symbolsIterator) Close() error {
errs := errors.NewMulti()
for _, f := range sit.files {
errs.Add(f.Close())
}
return errs.Err()
}
type symbolsFile struct {
dec *gob.Decoder
nextValid bool // if true, nextSymbol and nextErr have the next symbol (possibly "")
nextSymbol string
nextErr error
}
func newSymbolsFile(f *os.File) *symbolsFile {
sn := snappy.NewReader(f)
dec := gob.NewDecoder(sn)
return &symbolsFile{
dec: dec,
}
}
// Peek returns next symbol or error, but also preserves them for subsequent Peek or Next calls.
func (sf *symbolsFile) Peek() (string, error) {
if sf.nextValid {
return sf.nextSymbol, sf.nextErr
}
sf.nextValid = true
sf.nextSymbol, sf.nextErr = sf.readNext()
return sf.nextSymbol, sf.nextErr
}
2021-10-06 02:04:37 -07:00
// Next advances iterator and returns the next symbol or error.
func (sf *symbolsFile) Next() (string, error) {
if sf.nextValid {
defer func() {
sf.nextValid = false
sf.nextSymbol = ""
sf.nextErr = nil
}()
return sf.nextSymbol, sf.nextErr
}
return sf.readNext()
}
func (sf *symbolsFile) readNext() (string, error) {
var s string
err := sf.dec.Decode(&s)
// Decode returns io.EOF at the end.
if err != nil {
return "", err
}
return s, nil
}
func openFiles(filenames []string) ([]*os.File, error) {
var result []*os.File
for _, fn := range filenames {
f, err := os.Open(fn)
if err != nil {
2021-10-06 02:04:37 -07:00
// Close files opened so far.
for _, sf := range result {
_ = sf.Close()
}
return nil, err
}
result = append(result, f)
}
return result, nil
}