prometheus/tsdb/hashcache/series_hash_cache.go

package hashcache

import (
	"sync"

	"go.uber.org/atomic"

	"github.com/prometheus/prometheus/storage"
)

const (
	numGenerations = 4

	// approxBytesPerEntry is the estimated memory footprint (in bytes) of 1 cache
	// entry, measured with TestSeriesHashCache_MeasureApproximateSizePerEntry().
	approxBytesPerEntry = 28
)

// SeriesHashCache is a bounded cache mapping the per-block series ID with
// its labels hash.
type SeriesHashCache struct {
	maxEntriesPerGeneration uint64

	generationsMx sync.RWMutex
	generations   [numGenerations]cacheGeneration
}

func NewSeriesHashCache(maxBytes uint64) *SeriesHashCache {
	maxEntriesPerGeneration := maxBytes / approxBytesPerEntry / numGenerations
	if maxEntriesPerGeneration < 1 {
		maxEntriesPerGeneration = 1
	}

	c := &SeriesHashCache{maxEntriesPerGeneration: maxEntriesPerGeneration}

	// Init generations.
	for idx := 0; idx < numGenerations; idx++ {
		c.generations[idx].blocks = &sync.Map{}
		c.generations[idx].length = atomic.NewUint64(0)
	}

	return c
}

// GetBlockCache returns a reference to the series hash cache for the provided blockID.
// The returned cache reference should be retained only for a short period (ie. the duration
// of the execution of 1 single query).
func (c *SeriesHashCache) GetBlockCache(blockID string) *BlockSeriesHashCache {
	blockCache := &BlockSeriesHashCache{}

	c.generationsMx.RLock()
	defer c.generationsMx.RUnlock()

	// Trigger a garbage collection if the current generation reached the max size.
	if c.generations[0].length.Load() >= c.maxEntriesPerGeneration {
		c.generationsMx.RUnlock()
		c.gc()
		c.generationsMx.RLock()
	}

	for idx := 0; idx < numGenerations; idx++ {
		gen := c.generations[idx]

		if value, ok := gen.blocks.Load(blockID); ok {
			blockCache.generations[idx] = value.(*blockCacheGeneration)
			continue
		}

		// Create a new per-block cache only for the current generation.
		// If the cache for the older generation doesn't exist, then its
		// value will be null and skipped when reading.
		if idx == 0 {
			value, _ := gen.blocks.LoadOrStore(blockID, newBlockCacheGeneration(gen.length))
			blockCache.generations[idx] = value.(*blockCacheGeneration)
		}
	}

	return blockCache
}

// GetBlockCacheProvider returns a cache provider bounded to the provided blockID.
func (c *SeriesHashCache) GetBlockCacheProvider(blockID string) *BlockSeriesHashCacheProvider {
	return NewBlockSeriesHashCacheProvider(c, blockID)
}

func (c *SeriesHashCache) gc() {
	c.generationsMx.Lock()
	defer c.generationsMx.Unlock()

	// Make sure no other goroutines already GCed the current generation.
	if c.generations[0].length.Load() < c.maxEntriesPerGeneration {
		return
	}

	// Shift the current generation to old.
	for idx := numGenerations - 2; idx >= 0; idx-- {
		c.generations[idx+1] = c.generations[idx]
	}

	// Initialise a new empty current generation.
	c.generations[0] = cacheGeneration{
		blocks: &sync.Map{},
		length: atomic.NewUint64(0),
	}
}

// cacheGeneration holds a multi-blocks cache generation.
type cacheGeneration struct {
	// blocks maps the block ID with blockCacheGeneration.
	blocks *sync.Map

	// Keeps track of the number of items added to the cache. This counter
	// is passed to each blockCacheGeneration belonging to this generation.
	length *atomic.Uint64
}

// blockCacheGeneration holds a per-block cache generation.
type blockCacheGeneration struct {
	// hashes maps per-block series ID with its hash.
	hashesMx sync.RWMutex
	hashes   map[storage.SeriesRef]uint64

	// Keeps track of the number of items added to the cache. This counter is
	// shared with all blockCacheGeneration in the "parent" cacheGeneration.
	length *atomic.Uint64
}

func newBlockCacheGeneration(length *atomic.Uint64) *blockCacheGeneration {
	return &blockCacheGeneration{
		hashes: make(map[storage.SeriesRef]uint64),
		length: length,
	}
}

type BlockSeriesHashCache struct {
	generations [numGenerations]*blockCacheGeneration
}

// Fetch the hash of the given seriesID from the cache and returns a boolean
// whether the series was found in the cache or not.
func (c *BlockSeriesHashCache) Fetch(seriesID storage.SeriesRef) (uint64, bool) {
	// Look for it in all generations, starting from the most recent one (index 0).
	for idx := 0; idx < numGenerations; idx++ {
		gen := c.generations[idx]

		// Skip if the cache doesn't exist for this generation.
		if gen == nil {
			continue
		}

		gen.hashesMx.RLock()
		value, ok := gen.hashes[seriesID]
		gen.hashesMx.RUnlock()

		if ok {
			return value, true
		}
	}

	return 0, false
}

// Store the hash of the given seriesID in the cache.
func (c *BlockSeriesHashCache) Store(seriesID storage.SeriesRef, hash uint64) {
	// Store it in the most recent generation (index 0).
	gen := c.generations[0]

	gen.hashesMx.Lock()
	gen.hashes[seriesID] = hash
	gen.hashesMx.Unlock()

	gen.length.Add(1)
}

type BlockSeriesHashCacheProvider struct {
	cache   *SeriesHashCache
	blockID string
}

// NewBlockSeriesHashCacheProvider makes a new BlockSeriesHashCacheProvider.
func NewBlockSeriesHashCacheProvider(cache *SeriesHashCache, blockID string) *BlockSeriesHashCacheProvider {
	return &BlockSeriesHashCacheProvider{
		cache:   cache,
		blockID: blockID,
	}
}

// SeriesHashCache returns a reference to the cache bounded to block provided
// to NewBlockSeriesHashCacheProvider().
func (p *BlockSeriesHashCacheProvider) SeriesHashCache() *BlockSeriesHashCache {
	return p.cache.GetBlockCache(p.blockID)
}
Added series hash cache support to TSDB (#5) * Added series hash cache support to TSDB Signed-off-by: Marco Pracucci <marco@pracucci.com> * Fixed imports grouping Signed-off-by: Marco Pracucci <marco@pracucci.com> 2021-08-17 06:31:08 -07:00			`package hashcache`

			`import (`
			`"sync"`

			`"go.uber.org/atomic"`
Merge remote-tracking branch 'upstream/main' into merge-upstream 2021-11-18 06:46:46 -08:00
			`"github.com/prometheus/prometheus/storage"`
Added series hash cache support to TSDB (#5) * Added series hash cache support to TSDB Signed-off-by: Marco Pracucci <marco@pracucci.com> * Fixed imports grouping Signed-off-by: Marco Pracucci <marco@pracucci.com> 2021-08-17 06:31:08 -07:00			`)`

			`const (`
			`numGenerations = 4`

			`// approxBytesPerEntry is the estimated memory footprint (in bytes) of 1 cache`
			`// entry, measured with TestSeriesHashCache_MeasureApproximateSizePerEntry().`
			`approxBytesPerEntry = 28`
			`)`

			`// SeriesHashCache is a bounded cache mapping the per-block series ID with`
			`// its labels hash.`
			`type SeriesHashCache struct {`
			`maxEntriesPerGeneration uint64`

			`generationsMx sync.RWMutex`
			`generations [numGenerations]cacheGeneration`
			`}`

			`func NewSeriesHashCache(maxBytes uint64) *SeriesHashCache {`
			`maxEntriesPerGeneration := maxBytes / approxBytesPerEntry / numGenerations`
			`if maxEntriesPerGeneration < 1 {`
			`maxEntriesPerGeneration = 1`
			`}`

			`c := &SeriesHashCache{maxEntriesPerGeneration: maxEntriesPerGeneration}`

			`// Init generations.`
			`for idx := 0; idx < numGenerations; idx++ {`
			`c.generations[idx].blocks = &sync.Map{}`
			`c.generations[idx].length = atomic.NewUint64(0)`
			`}`

			`return c`
			`}`

			`// GetBlockCache returns a reference to the series hash cache for the provided blockID.`
			`// The returned cache reference should be retained only for a short period (ie. the duration`
			`// of the execution of 1 single query).`
			`func (c SeriesHashCache) GetBlockCache(blockID string) BlockSeriesHashCache {`
			`blockCache := &BlockSeriesHashCache{}`

			`c.generationsMx.RLock()`
			`defer c.generationsMx.RUnlock()`

			`// Trigger a garbage collection if the current generation reached the max size.`
			`if c.generations[0].length.Load() >= c.maxEntriesPerGeneration {`
			`c.generationsMx.RUnlock()`
			`c.gc()`
			`c.generationsMx.RLock()`
			`}`

			`for idx := 0; idx < numGenerations; idx++ {`
			`gen := c.generations[idx]`

			`if value, ok := gen.blocks.Load(blockID); ok {`
			`blockCache.generations[idx] = value.(*blockCacheGeneration)`
			`continue`
			`}`

			`// Create a new per-block cache only for the current generation.`
			`// If the cache for the older generation doesn't exist, then its`
			`// value will be null and skipped when reading.`
			`if idx == 0 {`
			`value, _ := gen.blocks.LoadOrStore(blockID, newBlockCacheGeneration(gen.length))`
			`blockCache.generations[idx] = value.(*blockCacheGeneration)`
			`}`
			`}`

			`return blockCache`
			`}`

			`// GetBlockCacheProvider returns a cache provider bounded to the provided blockID.`
			`func (c SeriesHashCache) GetBlockCacheProvider(blockID string) BlockSeriesHashCacheProvider {`
			`return NewBlockSeriesHashCacheProvider(c, blockID)`
			`}`

			`func (c *SeriesHashCache) gc() {`
			`c.generationsMx.Lock()`
			`defer c.generationsMx.Unlock()`

			`// Make sure no other goroutines already GCed the current generation.`
			`if c.generations[0].length.Load() < c.maxEntriesPerGeneration {`
			`return`
			`}`

			`// Shift the current generation to old.`
			`for idx := numGenerations - 2; idx >= 0; idx-- {`
			`c.generations[idx+1] = c.generations[idx]`
			`}`

			`// Initialise a new empty current generation.`
			`c.generations[0] = cacheGeneration{`
			`blocks: &sync.Map{},`
			`length: atomic.NewUint64(0),`
			`}`
			`}`

			`// cacheGeneration holds a multi-blocks cache generation.`
			`type cacheGeneration struct {`
			`// blocks maps the block ID with blockCacheGeneration.`
			`blocks *sync.Map`

			`// Keeps track of the number of items added to the cache. This counter`
			`// is passed to each blockCacheGeneration belonging to this generation.`
			`length *atomic.Uint64`
			`}`

			`// blockCacheGeneration holds a per-block cache generation.`
			`type blockCacheGeneration struct {`
			`// hashes maps per-block series ID with its hash.`
			`hashesMx sync.RWMutex`
Merge remote-tracking branch 'upstream/main' into merge-upstream 2021-11-18 06:46:46 -08:00			`hashes map[storage.SeriesRef]uint64`
Added series hash cache support to TSDB (#5) * Added series hash cache support to TSDB Signed-off-by: Marco Pracucci <marco@pracucci.com> * Fixed imports grouping Signed-off-by: Marco Pracucci <marco@pracucci.com> 2021-08-17 06:31:08 -07:00
			`// Keeps track of the number of items added to the cache. This counter is`
			`// shared with all blockCacheGeneration in the "parent" cacheGeneration.`
			`length *atomic.Uint64`
			`}`

			`func newBlockCacheGeneration(length atomic.Uint64) blockCacheGeneration {`
			`return &blockCacheGeneration{`
Merge remote-tracking branch 'upstream/main' into merge-upstream 2021-11-18 06:46:46 -08:00			`hashes: make(map[storage.SeriesRef]uint64),`
Added series hash cache support to TSDB (#5) * Added series hash cache support to TSDB Signed-off-by: Marco Pracucci <marco@pracucci.com> * Fixed imports grouping Signed-off-by: Marco Pracucci <marco@pracucci.com> 2021-08-17 06:31:08 -07:00			`length: length,`
			`}`
			`}`

			`type BlockSeriesHashCache struct {`
			`generations [numGenerations]*blockCacheGeneration`
			`}`

			`// Fetch the hash of the given seriesID from the cache and returns a boolean`
			`// whether the series was found in the cache or not.`
Merge remote-tracking branch 'upstream/main' into merge-upstream 2021-11-18 06:46:46 -08:00			`func (c *BlockSeriesHashCache) Fetch(seriesID storage.SeriesRef) (uint64, bool) {`
Added series hash cache support to TSDB (#5) * Added series hash cache support to TSDB Signed-off-by: Marco Pracucci <marco@pracucci.com> * Fixed imports grouping Signed-off-by: Marco Pracucci <marco@pracucci.com> 2021-08-17 06:31:08 -07:00			`// Look for it in all generations, starting from the most recent one (index 0).`
			`for idx := 0; idx < numGenerations; idx++ {`
			`gen := c.generations[idx]`

			`// Skip if the cache doesn't exist for this generation.`
			`if gen == nil {`
			`continue`
			`}`

			`gen.hashesMx.RLock()`
			`value, ok := gen.hashes[seriesID]`
			`gen.hashesMx.RUnlock()`

			`if ok {`
			`return value, true`
			`}`
			`}`

			`return 0, false`
			`}`

			`// Store the hash of the given seriesID in the cache.`
Merge remote-tracking branch 'upstream/main' into merge-upstream 2021-11-18 06:46:46 -08:00			`func (c *BlockSeriesHashCache) Store(seriesID storage.SeriesRef, hash uint64) {`
Added series hash cache support to TSDB (#5) * Added series hash cache support to TSDB Signed-off-by: Marco Pracucci <marco@pracucci.com> * Fixed imports grouping Signed-off-by: Marco Pracucci <marco@pracucci.com> 2021-08-17 06:31:08 -07:00			`// Store it in the most recent generation (index 0).`
			`gen := c.generations[0]`

			`gen.hashesMx.Lock()`
			`gen.hashes[seriesID] = hash`
			`gen.hashesMx.Unlock()`

			`gen.length.Add(1)`
			`}`

			`type BlockSeriesHashCacheProvider struct {`
			`cache *SeriesHashCache`
			`blockID string`
			`}`

			`// NewBlockSeriesHashCacheProvider makes a new BlockSeriesHashCacheProvider.`
			`func NewBlockSeriesHashCacheProvider(cache SeriesHashCache, blockID string) BlockSeriesHashCacheProvider {`
			`return &BlockSeriesHashCacheProvider{`
			`cache: cache,`
			`blockID: blockID,`
			`}`
			`}`

			`// SeriesHashCache returns a reference to the cache bounded to block provided`
			`// to NewBlockSeriesHashCacheProvider().`
			`func (p BlockSeriesHashCacheProvider) SeriesHashCache() BlockSeriesHashCache {`
			`return p.cache.GetBlockCache(p.blockID)`
			`}`