prometheus/tsdb/hashcache/series_hash_cache.go

package hashcache

import (
	"sync"

	"go.uber.org/atomic"

	"github.com/prometheus/prometheus/storage"
)

const (
	numGenerations = 4

	// approxBytesPerEntry is the estimated memory footprint (in bytes) of 1 cache
	// entry, measured with TestSeriesHashCache_MeasureApproximateSizePerEntry().
	approxBytesPerEntry = 28
)

// SeriesHashCache is a bounded cache mapping the per-block series ID with
// its labels hash.
type SeriesHashCache struct {
	maxEntriesPerGeneration uint64

	generationsMx sync.RWMutex
	generations   [numGenerations]cacheGeneration
}

func NewSeriesHashCache(maxBytes uint64) *SeriesHashCache {
	maxEntriesPerGeneration := maxBytes / approxBytesPerEntry / numGenerations
	if maxEntriesPerGeneration < 1 {
		maxEntriesPerGeneration = 1
	}

	c := &SeriesHashCache{maxEntriesPerGeneration: maxEntriesPerGeneration}

	// Init generations.
	for idx := 0; idx < numGenerations; idx++ {
		c.generations[idx].blocks = &sync.Map{}
		c.generations[idx].length = atomic.NewUint64(0)
	}

	return c
}

// GetBlockCache returns a reference to the series hash cache for the provided blockID.
// The returned cache reference should be retained only for a short period (ie. the duration
// of the execution of 1 single query).
func (c *SeriesHashCache) GetBlockCache(blockID string) *BlockSeriesHashCache {
	blockCache := &BlockSeriesHashCache{}

	c.generationsMx.RLock()
	defer c.generationsMx.RUnlock()

	// Trigger a garbage collection if the current generation reached the max size.
	if c.generations[0].length.Load() >= c.maxEntriesPerGeneration {
		c.generationsMx.RUnlock()
		c.gc()
		c.generationsMx.RLock()
	}

	for idx := 0; idx < numGenerations; idx++ {
		gen := c.generations[idx]

		if value, ok := gen.blocks.Load(blockID); ok {
			blockCache.generations[idx] = value.(*blockCacheGeneration)
			continue
		}

		// Create a new per-block cache only for the current generation.
		// If the cache for the older generation doesn't exist, then its
		// value will be null and skipped when reading.
		if idx == 0 {
			value, _ := gen.blocks.LoadOrStore(blockID, newBlockCacheGeneration(gen.length))
			blockCache.generations[idx] = value.(*blockCacheGeneration)
		}
	}

	return blockCache
}

// GetBlockCacheProvider returns a cache provider bounded to the provided blockID.
func (c *SeriesHashCache) GetBlockCacheProvider(blockID string) *BlockSeriesHashCacheProvider {
	return NewBlockSeriesHashCacheProvider(c, blockID)
}

func (c *SeriesHashCache) gc() {
	c.generationsMx.Lock()
	defer c.generationsMx.Unlock()

	// Make sure no other goroutines already GCed the current generation.
	if c.generations[0].length.Load() < c.maxEntriesPerGeneration {
		return
	}

	// Shift the current generation to old.
	for idx := numGenerations - 2; idx >= 0; idx-- {
		c.generations[idx+1] = c.generations[idx]
	}

	// Initialise a new empty current generation.
	c.generations[0] = cacheGeneration{
		blocks: &sync.Map{},
		length: atomic.NewUint64(0),
	}
}

// cacheGeneration holds a multi-blocks cache generation.
type cacheGeneration struct {
	// blocks maps the block ID with blockCacheGeneration.
	blocks *sync.Map

	// Keeps track of the number of items added to the cache. This counter
	// is passed to each blockCacheGeneration belonging to this generation.
	length *atomic.Uint64
}

// blockCacheGeneration holds a per-block cache generation.
type blockCacheGeneration struct {
	// hashes maps per-block series ID with its hash.
	hashesMx sync.RWMutex
	hashes   map[storage.SeriesRef]uint64

	// Keeps track of the number of items added to the cache. This counter is
	// shared with all blockCacheGeneration in the "parent" cacheGeneration.
	length *atomic.Uint64
}

func newBlockCacheGeneration(length *atomic.Uint64) *blockCacheGeneration {
	return &blockCacheGeneration{
		hashes: make(map[storage.SeriesRef]uint64),
		length: length,
	}
}

type BlockSeriesHashCache struct {
	generations [numGenerations]*blockCacheGeneration
}

// Fetch the hash of the given seriesID from the cache and returns a boolean
// whether the series was found in the cache or not.
func (c *BlockSeriesHashCache) Fetch(seriesID storage.SeriesRef) (uint64, bool) {
	// Look for it in all generations, starting from the most recent one (index 0).
	for idx := 0; idx < numGenerations; idx++ {
		gen := c.generations[idx]

		// Skip if the cache doesn't exist for this generation.
		if gen == nil {
			continue
		}

		gen.hashesMx.RLock()
		value, ok := gen.hashes[seriesID]
		gen.hashesMx.RUnlock()

		if ok {
			return value, true
		}
	}

	return 0, false
}

// Store the hash of the given seriesID in the cache.
func (c *BlockSeriesHashCache) Store(seriesID storage.SeriesRef, hash uint64) {
	// Store it in the most recent generation (index 0).
	gen := c.generations[0]

	gen.hashesMx.Lock()
	gen.hashes[seriesID] = hash
	gen.hashesMx.Unlock()

	gen.length.Add(1)
}

type BlockSeriesHashCacheProvider struct {
	cache   *SeriesHashCache
	blockID string
}

// NewBlockSeriesHashCacheProvider makes a new BlockSeriesHashCacheProvider.
func NewBlockSeriesHashCacheProvider(cache *SeriesHashCache, blockID string) *BlockSeriesHashCacheProvider {
	return &BlockSeriesHashCacheProvider{
		cache:   cache,
		blockID: blockID,
	}
}

// SeriesHashCache returns a reference to the cache bounded to block provided
// to NewBlockSeriesHashCacheProvider().
func (p *BlockSeriesHashCacheProvider) SeriesHashCache() *BlockSeriesHashCache {
	return p.cache.GetBlockCache(p.blockID)
}