Extract processWithBoundedParallelismAndConsistentWorkers

Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
This commit is contained in:
Oleg Zaytsev 2024-09-26 15:43:19 +02:00
parent ccd0308abc
commit 4fd2556baa
No known key found for this signature in database
GPG key ID: 7E9FE9FD48F512EF

View file

@ -300,52 +300,34 @@ func (p *MemPostings) Delete(deleted map[storage.SeriesRef]struct{}, affected ma
// Deleting label names mutates p.m map, so it should be done from a single goroutine after nobody else is reading it. // Deleting label names mutates p.m map, so it should be done from a single goroutine after nobody else is reading it.
deleteLabelNames := make(chan string, len(p.m)) deleteLabelNames := make(chan string, len(p.m))
process := func(l labels.Label) { process, wait := processWithBoundedParallelismAndConsistentWorkers(
orig := p.m[l.Name][l.Value] runtime.GOMAXPROCS(0),
repl := make([]storage.SeriesRef, 0, len(orig)) func(l labels.Label) uint64 { return xxhash.Sum64String(l.Name) },
for _, id := range orig { func(l labels.Label) {
if _, ok := deleted[id]; !ok { orig := p.m[l.Name][l.Value]
repl = append(repl, id) repl := make([]storage.SeriesRef, 0, len(orig))
for _, id := range orig {
if _, ok := deleted[id]; !ok {
repl = append(repl, id)
}
} }
} if len(repl) > 0 {
if len(repl) > 0 { p.m[l.Name][l.Value] = repl
p.m[l.Name][l.Value] = repl } else {
} else { delete(p.m[l.Name], l.Value)
delete(p.m[l.Name], l.Value) if len(p.m[l.Name]) == 0 {
if len(p.m[l.Name]) == 0 { // Delete the key if we removed all values.
// Delete the key if we removed all values. deleteLabelNames <- l.Name
deleteLabelNames <- l.Name }
} }
} },
} )
// Create GOMAXPROCS workers.
wg := sync.WaitGroup{}
jobs := make([]chan labels.Label, runtime.GOMAXPROCS(0))
for i := range jobs {
jobs[i] = make(chan labels.Label, 128)
wg.Add(1)
go func(jobs chan labels.Label) {
defer wg.Done()
for l := range jobs {
process(l)
}
}(jobs[i])
}
// Process all affected labels and the allPostingsKey.
for l := range affected { for l := range affected {
j := int(xxhash.Sum64String(l.Name) % uint64(len(jobs))) process(l)
jobs[j] <- l
} }
j := int(xxhash.Sum64String(allPostingsKey.Name) % uint64(len(jobs))) process(allPostingsKey)
jobs[j] <- allPostingsKey wait()
// Close jobs channels and wait all workers to finish.
for i := range jobs {
close(jobs[i])
}
wg.Wait()
// Close deleteLabelNames channel and delete the label names requested. // Close deleteLabelNames channel and delete the label names requested.
close(deleteLabelNames) close(deleteLabelNames)
@ -354,6 +336,35 @@ func (p *MemPostings) Delete(deleted map[storage.SeriesRef]struct{}, affected ma
} }
} }
// processWithBoundedParallelismAndConsistentWorkers will call f() with bounded parallelism,
// making sure that elements with same hash(T) will always be processed by the same worker.
// Call process() to add more jobs to process, and once finished adding, call wait() to ensure that all jobs are processed.
func processWithBoundedParallelismAndConsistentWorkers[T any](workers int, hash func(T) uint64, f func(T)) (process func(T), wait func()) {
wg := &sync.WaitGroup{}
jobs := make([]chan T, workers)
for i := 0; i < workers; i++ {
wg.Add(1)
jobs[i] = make(chan T, 128)
go func(jobs <-chan T) {
defer wg.Done()
for l := range jobs {
f(l)
}
}(jobs[i])
}
process = func(job T) {
jobs[hash(job)%uint64(workers)] <- job
}
wait = func() {
for i := range jobs {
close(jobs[i])
}
wg.Wait()
}
return process, wait
}
// Iter calls f for each postings list. It aborts if f returns an error and returns it. // Iter calls f for each postings list. It aborts if f returns an error and returns it.
func (p *MemPostings) Iter(f func(labels.Label, Postings) error) error { func (p *MemPostings) Iter(f func(labels.Label, Postings) error) error {
p.mtx.RLock() p.mtx.RLock()