2021-10-05 07:06:38 -07:00
|
|
|
package tsdb
|
|
|
|
|
|
|
|
import (
|
|
|
|
"container/heap"
|
|
|
|
"encoding/gob"
|
2024-01-05 04:12:23 -08:00
|
|
|
"errors"
|
2021-10-05 07:06:38 -07:00
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"os"
|
|
|
|
"path/filepath"
|
|
|
|
"sort"
|
2021-12-02 01:34:52 -08:00
|
|
|
"sync"
|
2021-10-05 07:06:38 -07:00
|
|
|
|
|
|
|
"github.com/golang/snappy"
|
|
|
|
|
2024-01-05 04:12:23 -08:00
|
|
|
tsdb_errors "github.com/prometheus/prometheus/tsdb/errors"
|
2021-10-05 07:06:38 -07:00
|
|
|
)
|
|
|
|
|
2021-12-02 01:34:52 -08:00
|
|
|
// symbolFlushers writes symbols to provided files in background goroutines.
|
|
|
|
type symbolFlushers struct {
|
|
|
|
jobs chan flusherJob
|
|
|
|
wg sync.WaitGroup
|
|
|
|
|
|
|
|
closed bool
|
|
|
|
|
|
|
|
errMu sync.Mutex
|
|
|
|
err error
|
|
|
|
|
|
|
|
pool *sync.Pool
|
|
|
|
}
|
|
|
|
|
|
|
|
func newSymbolFlushers(concurrency int) *symbolFlushers {
|
|
|
|
f := &symbolFlushers{
|
|
|
|
jobs: make(chan flusherJob),
|
|
|
|
pool: &sync.Pool{},
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := 0; i < concurrency; i++ {
|
|
|
|
f.wg.Add(1)
|
|
|
|
go f.loop()
|
|
|
|
}
|
|
|
|
|
|
|
|
return f
|
|
|
|
}
|
|
|
|
|
|
|
|
func (f *symbolFlushers) flushSymbols(outputFile string, symbols map[string]struct{}) error {
|
|
|
|
if len(symbols) == 0 {
|
|
|
|
return fmt.Errorf("no symbols")
|
|
|
|
}
|
|
|
|
|
|
|
|
f.errMu.Lock()
|
|
|
|
err := f.err
|
|
|
|
f.errMu.Unlock()
|
|
|
|
|
|
|
|
// If there was any error previously, return it.
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
f.jobs <- flusherJob{
|
|
|
|
outputFile: outputFile,
|
|
|
|
symbols: symbols,
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (f *symbolFlushers) loop() {
|
|
|
|
defer f.wg.Done()
|
|
|
|
|
|
|
|
for j := range f.jobs {
|
|
|
|
var sortedSymbols []string
|
|
|
|
|
|
|
|
pooled := f.pool.Get()
|
|
|
|
if pooled == nil {
|
|
|
|
sortedSymbols = make([]string, 0, len(j.symbols))
|
|
|
|
} else {
|
|
|
|
sortedSymbols = pooled.([]string)
|
|
|
|
sortedSymbols = sortedSymbols[:0]
|
|
|
|
}
|
|
|
|
|
|
|
|
for s := range j.symbols {
|
|
|
|
sortedSymbols = append(sortedSymbols, s)
|
|
|
|
}
|
|
|
|
sort.Strings(sortedSymbols)
|
|
|
|
|
|
|
|
err := writeSymbolsToFile(j.outputFile, sortedSymbols)
|
|
|
|
sortedSymbols = sortedSymbols[:0]
|
|
|
|
|
|
|
|
//nolint:staticcheck // Ignore SA6002 safe to ignore and actually fixing it has some performance penalty.
|
|
|
|
f.pool.Put(sortedSymbols)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
f.errMu.Lock()
|
|
|
|
if f.err == nil {
|
|
|
|
f.err = err
|
|
|
|
}
|
|
|
|
f.errMu.Unlock()
|
|
|
|
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-11-01 06:25:51 -07:00
|
|
|
for range f.jobs {
|
2021-12-02 01:34:52 -08:00
|
|
|
// drain the channel, don't do more flushing. only used when error occurs.
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Stops and waits until all flusher goroutines are finished.
|
|
|
|
func (f *symbolFlushers) close() error {
|
|
|
|
if f.closed {
|
|
|
|
return f.err
|
|
|
|
}
|
|
|
|
|
|
|
|
f.closed = true
|
|
|
|
close(f.jobs)
|
|
|
|
f.wg.Wait()
|
|
|
|
|
|
|
|
return f.err
|
|
|
|
}
|
|
|
|
|
|
|
|
type flusherJob struct {
|
|
|
|
outputFile string
|
|
|
|
symbols map[string]struct{}
|
|
|
|
}
|
|
|
|
|
2021-10-05 07:06:38 -07:00
|
|
|
// symbolsBatcher keeps buffer of symbols in memory. Once the buffer reaches the size limit (number of symbols),
|
|
|
|
// batcher writes currently buffered symbols to file. At the end remaining symbols must be flushed. After writing
|
|
|
|
// all batches, symbolsBatcher has list of files that can be used together with newSymbolsIterator to iterate
|
|
|
|
// through all previously added symbols in sorted order.
|
|
|
|
type symbolsBatcher struct {
|
|
|
|
dir string
|
|
|
|
limit int
|
|
|
|
|
2021-12-02 01:34:52 -08:00
|
|
|
symbolsFiles []string // paths of symbol files, which were sent to flushers for flushing
|
|
|
|
|
|
|
|
buffer map[string]struct{} // using map to deduplicate
|
|
|
|
flushers *symbolFlushers
|
2021-10-05 07:06:38 -07:00
|
|
|
}
|
|
|
|
|
2021-12-02 01:34:52 -08:00
|
|
|
func newSymbolsBatcher(limit int, dir string, flushers *symbolFlushers) *symbolsBatcher {
|
2021-10-05 07:06:38 -07:00
|
|
|
return &symbolsBatcher{
|
2021-12-02 01:34:52 -08:00
|
|
|
limit: limit,
|
|
|
|
dir: dir,
|
|
|
|
buffer: make(map[string]struct{}, limit),
|
|
|
|
flushers: flushers,
|
2021-10-05 07:06:38 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sw *symbolsBatcher) flushSymbols(force bool) error {
|
|
|
|
if !force && len(sw.buffer) < sw.limit {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-12-02 01:34:52 -08:00
|
|
|
if len(sw.buffer) == 0 {
|
|
|
|
return nil
|
2021-10-05 07:06:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
symbolsFile := filepath.Join(sw.dir, fmt.Sprintf("symbols_%d", len(sw.symbolsFiles)))
|
2021-12-02 01:34:52 -08:00
|
|
|
sw.symbolsFiles = append(sw.symbolsFiles, symbolsFile)
|
2021-10-05 07:06:38 -07:00
|
|
|
|
2021-12-02 01:34:52 -08:00
|
|
|
buf := sw.buffer
|
|
|
|
sw.buffer = make(map[string]struct{}, sw.limit)
|
|
|
|
return sw.flushers.flushSymbols(symbolsFile, buf)
|
2021-10-05 07:06:38 -07:00
|
|
|
}
|
|
|
|
|
2021-12-02 01:34:52 -08:00
|
|
|
// getSymbolFiles returns list of symbol files used to flush symbols to. These files are only valid if flushers
|
|
|
|
// finish successfully.
|
|
|
|
func (sw *symbolsBatcher) getSymbolFiles() []string {
|
2021-10-05 07:06:38 -07:00
|
|
|
return sw.symbolsFiles
|
|
|
|
}
|
|
|
|
|
|
|
|
func writeSymbolsToFile(filename string, symbols []string) error {
|
|
|
|
f, err := os.Create(filename)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-10-06 02:04:37 -07:00
|
|
|
// Snappy is used for buffering and to create smaller files.
|
2021-10-05 07:06:38 -07:00
|
|
|
sn := snappy.NewBufferedWriter(f)
|
|
|
|
enc := gob.NewEncoder(sn)
|
|
|
|
|
2024-01-05 04:12:23 -08:00
|
|
|
errs := tsdb_errors.NewMulti()
|
2021-10-05 07:06:38 -07:00
|
|
|
|
|
|
|
for _, s := range symbols {
|
|
|
|
err := enc.Encode(s)
|
|
|
|
if err != nil {
|
|
|
|
errs.Add(err)
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
errs.Add(sn.Close())
|
|
|
|
errs.Add(f.Close())
|
|
|
|
return errs.Err()
|
|
|
|
}
|
|
|
|
|
2021-10-06 02:04:37 -07:00
|
|
|
// Implements heap.Interface using symbols from files.
|
2021-10-05 07:06:38 -07:00
|
|
|
type symbolsHeap []*symbolsFile
|
|
|
|
|
2021-10-06 02:04:37 -07:00
|
|
|
// Len implements sort.Interface.
|
2021-10-05 07:06:38 -07:00
|
|
|
func (s *symbolsHeap) Len() int {
|
|
|
|
return len(*s)
|
|
|
|
}
|
|
|
|
|
2021-10-06 02:04:37 -07:00
|
|
|
// Less implements sort.Interface.
|
2021-10-05 07:06:38 -07:00
|
|
|
func (s *symbolsHeap) Less(i, j int) bool {
|
|
|
|
iw, ierr := (*s)[i].Peek()
|
|
|
|
if ierr != nil {
|
2021-10-06 02:04:37 -07:00
|
|
|
// Empty string will be sorted first, so error will be returned before any other result.
|
2021-10-05 07:06:38 -07:00
|
|
|
iw = ""
|
|
|
|
}
|
|
|
|
|
|
|
|
jw, jerr := (*s)[j].Peek()
|
|
|
|
if jerr != nil {
|
|
|
|
jw = ""
|
|
|
|
}
|
|
|
|
|
|
|
|
return iw < jw
|
|
|
|
}
|
|
|
|
|
2021-10-06 02:04:37 -07:00
|
|
|
// Swap implements sort.Interface.
|
2021-10-05 07:06:38 -07:00
|
|
|
func (s *symbolsHeap) Swap(i, j int) {
|
|
|
|
(*s)[i], (*s)[j] = (*s)[j], (*s)[i]
|
|
|
|
}
|
|
|
|
|
2021-10-06 02:04:37 -07:00
|
|
|
// Push implements heap.Interface. Push should add x as element Len().
|
2021-10-05 07:06:38 -07:00
|
|
|
func (s *symbolsHeap) Push(x interface{}) {
|
2021-10-06 02:04:37 -07:00
|
|
|
*s = append(*s, x.(*symbolsFile))
|
2021-10-05 07:06:38 -07:00
|
|
|
}
|
|
|
|
|
2021-10-06 02:04:37 -07:00
|
|
|
// Pop implements heap.Interface. Pop should remove and return element Len() - 1.
|
2021-10-05 07:06:38 -07:00
|
|
|
func (s *symbolsHeap) Pop() interface{} {
|
|
|
|
l := len(*s)
|
|
|
|
res := (*s)[l-1]
|
|
|
|
*s = (*s)[:l-1]
|
|
|
|
return res
|
|
|
|
}
|
|
|
|
|
|
|
|
type symbolsIterator struct {
|
|
|
|
files []*os.File
|
|
|
|
heap symbolsHeap
|
|
|
|
|
|
|
|
// To avoid returning duplicates, we remember last returned symbol. We want to support "" as a valid
|
|
|
|
// symbol, so we use pointer to a string instead.
|
|
|
|
lastReturned *string
|
|
|
|
}
|
|
|
|
|
|
|
|
func newSymbolsIterator(filenames []string) (*symbolsIterator, error) {
|
|
|
|
files, err := openFiles(filenames)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
var symFiles []*symbolsFile
|
|
|
|
for _, f := range files {
|
|
|
|
symFiles = append(symFiles, newSymbolsFile(f))
|
|
|
|
}
|
|
|
|
|
|
|
|
h := &symbolsIterator{
|
|
|
|
files: files,
|
|
|
|
heap: symFiles,
|
|
|
|
}
|
|
|
|
|
|
|
|
heap.Init(&h.heap)
|
|
|
|
|
|
|
|
return h, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// NextSymbol advances iterator forward, and returns next symbol.
|
|
|
|
// If there is no next element, returns err == io.EOF.
|
|
|
|
func (sit *symbolsIterator) NextSymbol() (string, error) {
|
2021-10-06 02:04:37 -07:00
|
|
|
for len(sit.heap) > 0 {
|
|
|
|
result, err := sit.heap[0].Next()
|
2024-01-05 04:12:23 -08:00
|
|
|
if errors.Is(err, io.EOF) {
|
2021-10-06 02:04:37 -07:00
|
|
|
// End of file, remove it from heap, and try next file.
|
|
|
|
heap.Remove(&sit.heap, 0)
|
|
|
|
continue
|
|
|
|
}
|
2021-10-05 07:06:38 -07:00
|
|
|
|
2021-10-06 02:04:37 -07:00
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
2021-10-05 07:06:38 -07:00
|
|
|
|
2021-10-06 02:04:37 -07:00
|
|
|
heap.Fix(&sit.heap, 0)
|
2021-10-05 07:06:38 -07:00
|
|
|
|
2021-10-06 02:04:37 -07:00
|
|
|
if sit.lastReturned != nil && *sit.lastReturned == result {
|
|
|
|
// Duplicate symbol, try next one.
|
|
|
|
continue
|
|
|
|
}
|
2021-10-05 07:06:38 -07:00
|
|
|
|
|
|
|
sit.lastReturned = &result
|
|
|
|
return result, nil
|
|
|
|
}
|
|
|
|
|
2021-10-06 02:04:37 -07:00
|
|
|
return "", io.EOF
|
2021-10-05 07:06:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// Close all files.
|
|
|
|
func (sit *symbolsIterator) Close() error {
|
2024-01-05 04:12:23 -08:00
|
|
|
errs := tsdb_errors.NewMulti()
|
2021-10-05 07:06:38 -07:00
|
|
|
for _, f := range sit.files {
|
|
|
|
errs.Add(f.Close())
|
|
|
|
}
|
|
|
|
return errs.Err()
|
|
|
|
}
|
|
|
|
|
|
|
|
type symbolsFile struct {
|
|
|
|
dec *gob.Decoder
|
|
|
|
|
|
|
|
nextValid bool // if true, nextSymbol and nextErr have the next symbol (possibly "")
|
|
|
|
nextSymbol string
|
|
|
|
nextErr error
|
|
|
|
}
|
|
|
|
|
|
|
|
func newSymbolsFile(f *os.File) *symbolsFile {
|
|
|
|
sn := snappy.NewReader(f)
|
|
|
|
dec := gob.NewDecoder(sn)
|
|
|
|
|
|
|
|
return &symbolsFile{
|
|
|
|
dec: dec,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Peek returns next symbol or error, but also preserves them for subsequent Peek or Next calls.
|
|
|
|
func (sf *symbolsFile) Peek() (string, error) {
|
|
|
|
if sf.nextValid {
|
|
|
|
return sf.nextSymbol, sf.nextErr
|
|
|
|
}
|
|
|
|
|
|
|
|
sf.nextValid = true
|
|
|
|
sf.nextSymbol, sf.nextErr = sf.readNext()
|
|
|
|
return sf.nextSymbol, sf.nextErr
|
|
|
|
}
|
|
|
|
|
2021-10-06 02:04:37 -07:00
|
|
|
// Next advances iterator and returns the next symbol or error.
|
2021-10-05 07:06:38 -07:00
|
|
|
func (sf *symbolsFile) Next() (string, error) {
|
|
|
|
if sf.nextValid {
|
|
|
|
defer func() {
|
|
|
|
sf.nextValid = false
|
|
|
|
sf.nextSymbol = ""
|
|
|
|
sf.nextErr = nil
|
|
|
|
}()
|
|
|
|
return sf.nextSymbol, sf.nextErr
|
|
|
|
}
|
|
|
|
|
|
|
|
return sf.readNext()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sf *symbolsFile) readNext() (string, error) {
|
|
|
|
var s string
|
|
|
|
err := sf.dec.Decode(&s)
|
|
|
|
// Decode returns io.EOF at the end.
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
return s, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func openFiles(filenames []string) ([]*os.File, error) {
|
|
|
|
var result []*os.File
|
|
|
|
|
|
|
|
for _, fn := range filenames {
|
|
|
|
f, err := os.Open(fn)
|
|
|
|
if err != nil {
|
2021-10-06 02:04:37 -07:00
|
|
|
// Close files opened so far.
|
2021-10-05 07:06:38 -07:00
|
|
|
for _, sf := range result {
|
|
|
|
_ = sf.Close()
|
|
|
|
}
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
result = append(result, f)
|
|
|
|
}
|
|
|
|
return result, nil
|
|
|
|
}
|