2021-08-17 06:31:08 -07:00
|
|
|
package hashcache
|
|
|
|
|
|
|
|
import (
|
|
|
|
"sync"
|
|
|
|
|
|
|
|
"go.uber.org/atomic"
|
2021-11-18 06:46:46 -08:00
|
|
|
|
|
|
|
"github.com/prometheus/prometheus/storage"
|
2021-08-17 06:31:08 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
numGenerations = 4
|
|
|
|
|
|
|
|
// approxBytesPerEntry is the estimated memory footprint (in bytes) of 1 cache
|
|
|
|
// entry, measured with TestSeriesHashCache_MeasureApproximateSizePerEntry().
|
|
|
|
approxBytesPerEntry = 28
|
|
|
|
)
|
|
|
|
|
|
|
|
// SeriesHashCache is a bounded cache mapping the per-block series ID with
|
|
|
|
// its labels hash.
|
|
|
|
type SeriesHashCache struct {
|
|
|
|
maxEntriesPerGeneration uint64
|
|
|
|
|
|
|
|
generationsMx sync.RWMutex
|
|
|
|
generations [numGenerations]cacheGeneration
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewSeriesHashCache(maxBytes uint64) *SeriesHashCache {
|
|
|
|
maxEntriesPerGeneration := maxBytes / approxBytesPerEntry / numGenerations
|
|
|
|
if maxEntriesPerGeneration < 1 {
|
|
|
|
maxEntriesPerGeneration = 1
|
|
|
|
}
|
|
|
|
|
|
|
|
c := &SeriesHashCache{maxEntriesPerGeneration: maxEntriesPerGeneration}
|
|
|
|
|
|
|
|
// Init generations.
|
|
|
|
for idx := 0; idx < numGenerations; idx++ {
|
|
|
|
c.generations[idx].blocks = &sync.Map{}
|
|
|
|
c.generations[idx].length = atomic.NewUint64(0)
|
|
|
|
}
|
|
|
|
|
|
|
|
return c
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetBlockCache returns a reference to the series hash cache for the provided blockID.
|
|
|
|
// The returned cache reference should be retained only for a short period (ie. the duration
|
|
|
|
// of the execution of 1 single query).
|
|
|
|
func (c *SeriesHashCache) GetBlockCache(blockID string) *BlockSeriesHashCache {
|
|
|
|
blockCache := &BlockSeriesHashCache{}
|
|
|
|
|
|
|
|
c.generationsMx.RLock()
|
|
|
|
defer c.generationsMx.RUnlock()
|
|
|
|
|
|
|
|
// Trigger a garbage collection if the current generation reached the max size.
|
|
|
|
if c.generations[0].length.Load() >= c.maxEntriesPerGeneration {
|
|
|
|
c.generationsMx.RUnlock()
|
|
|
|
c.gc()
|
|
|
|
c.generationsMx.RLock()
|
|
|
|
}
|
|
|
|
|
|
|
|
for idx := 0; idx < numGenerations; idx++ {
|
|
|
|
gen := c.generations[idx]
|
|
|
|
|
|
|
|
if value, ok := gen.blocks.Load(blockID); ok {
|
|
|
|
blockCache.generations[idx] = value.(*blockCacheGeneration)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create a new per-block cache only for the current generation.
|
|
|
|
// If the cache for the older generation doesn't exist, then its
|
|
|
|
// value will be null and skipped when reading.
|
|
|
|
if idx == 0 {
|
|
|
|
value, _ := gen.blocks.LoadOrStore(blockID, newBlockCacheGeneration(gen.length))
|
|
|
|
blockCache.generations[idx] = value.(*blockCacheGeneration)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return blockCache
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetBlockCacheProvider returns a cache provider bounded to the provided blockID.
|
|
|
|
func (c *SeriesHashCache) GetBlockCacheProvider(blockID string) *BlockSeriesHashCacheProvider {
|
|
|
|
return NewBlockSeriesHashCacheProvider(c, blockID)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *SeriesHashCache) gc() {
|
|
|
|
c.generationsMx.Lock()
|
|
|
|
defer c.generationsMx.Unlock()
|
|
|
|
|
|
|
|
// Make sure no other goroutines already GCed the current generation.
|
|
|
|
if c.generations[0].length.Load() < c.maxEntriesPerGeneration {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Shift the current generation to old.
|
|
|
|
for idx := numGenerations - 2; idx >= 0; idx-- {
|
|
|
|
c.generations[idx+1] = c.generations[idx]
|
|
|
|
}
|
|
|
|
|
|
|
|
// Initialise a new empty current generation.
|
|
|
|
c.generations[0] = cacheGeneration{
|
|
|
|
blocks: &sync.Map{},
|
|
|
|
length: atomic.NewUint64(0),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// cacheGeneration holds a multi-blocks cache generation.
|
|
|
|
type cacheGeneration struct {
|
|
|
|
// blocks maps the block ID with blockCacheGeneration.
|
|
|
|
blocks *sync.Map
|
|
|
|
|
|
|
|
// Keeps track of the number of items added to the cache. This counter
|
|
|
|
// is passed to each blockCacheGeneration belonging to this generation.
|
|
|
|
length *atomic.Uint64
|
|
|
|
}
|
|
|
|
|
|
|
|
// blockCacheGeneration holds a per-block cache generation.
|
|
|
|
type blockCacheGeneration struct {
|
|
|
|
// hashes maps per-block series ID with its hash.
|
|
|
|
hashesMx sync.RWMutex
|
2021-11-18 06:46:46 -08:00
|
|
|
hashes map[storage.SeriesRef]uint64
|
2021-08-17 06:31:08 -07:00
|
|
|
|
|
|
|
// Keeps track of the number of items added to the cache. This counter is
|
|
|
|
// shared with all blockCacheGeneration in the "parent" cacheGeneration.
|
|
|
|
length *atomic.Uint64
|
|
|
|
}
|
|
|
|
|
|
|
|
func newBlockCacheGeneration(length *atomic.Uint64) *blockCacheGeneration {
|
|
|
|
return &blockCacheGeneration{
|
2021-11-18 06:46:46 -08:00
|
|
|
hashes: make(map[storage.SeriesRef]uint64),
|
2021-08-17 06:31:08 -07:00
|
|
|
length: length,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
type BlockSeriesHashCache struct {
|
|
|
|
generations [numGenerations]*blockCacheGeneration
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fetch the hash of the given seriesID from the cache and returns a boolean
|
|
|
|
// whether the series was found in the cache or not.
|
2021-11-18 06:46:46 -08:00
|
|
|
func (c *BlockSeriesHashCache) Fetch(seriesID storage.SeriesRef) (uint64, bool) {
|
2021-08-17 06:31:08 -07:00
|
|
|
// Look for it in all generations, starting from the most recent one (index 0).
|
|
|
|
for idx := 0; idx < numGenerations; idx++ {
|
|
|
|
gen := c.generations[idx]
|
|
|
|
|
|
|
|
// Skip if the cache doesn't exist for this generation.
|
|
|
|
if gen == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
gen.hashesMx.RLock()
|
|
|
|
value, ok := gen.hashes[seriesID]
|
|
|
|
gen.hashesMx.RUnlock()
|
|
|
|
|
|
|
|
if ok {
|
|
|
|
return value, true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0, false
|
|
|
|
}
|
|
|
|
|
|
|
|
// Store the hash of the given seriesID in the cache.
|
2021-11-18 06:46:46 -08:00
|
|
|
func (c *BlockSeriesHashCache) Store(seriesID storage.SeriesRef, hash uint64) {
|
2021-08-17 06:31:08 -07:00
|
|
|
// Store it in the most recent generation (index 0).
|
|
|
|
gen := c.generations[0]
|
|
|
|
|
|
|
|
gen.hashesMx.Lock()
|
|
|
|
gen.hashes[seriesID] = hash
|
|
|
|
gen.hashesMx.Unlock()
|
|
|
|
|
|
|
|
gen.length.Add(1)
|
|
|
|
}
|
|
|
|
|
|
|
|
type BlockSeriesHashCacheProvider struct {
|
|
|
|
cache *SeriesHashCache
|
|
|
|
blockID string
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewBlockSeriesHashCacheProvider makes a new BlockSeriesHashCacheProvider.
|
|
|
|
func NewBlockSeriesHashCacheProvider(cache *SeriesHashCache, blockID string) *BlockSeriesHashCacheProvider {
|
|
|
|
return &BlockSeriesHashCacheProvider{
|
|
|
|
cache: cache,
|
|
|
|
blockID: blockID,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// SeriesHashCache returns a reference to the cache bounded to block provided
|
|
|
|
// to NewBlockSeriesHashCacheProvider().
|
|
|
|
func (p *BlockSeriesHashCacheProvider) SeriesHashCache() *BlockSeriesHashCache {
|
|
|
|
return p.cache.GetBlockCache(p.blockID)
|
|
|
|
}
|