2021-08-03 05:14:26 -07:00
// Copyright 2021 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package tsdb
import (
"context"
2023-11-16 10:54:41 -08:00
"errors"
"fmt"
2021-08-03 05:14:26 -07:00
"math"
2024-01-15 08:24:46 -08:00
"slices"
2022-09-15 00:52:09 -07:00
"sync"
2021-08-03 05:14:26 -07:00
"github.com/go-kit/log/level"
2021-11-08 06:23:17 -08:00
"github.com/prometheus/prometheus/model/labels"
2021-08-03 05:14:26 -07:00
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/tsdb/chunkenc"
"github.com/prometheus/prometheus/tsdb/chunks"
"github.com/prometheus/prometheus/tsdb/index"
)
func ( h * Head ) ExemplarQuerier ( ctx context . Context ) ( storage . ExemplarQuerier , error ) {
return h . exemplars . ExemplarQuerier ( ctx )
}
// Index returns an IndexReader against the block.
func ( h * Head ) Index ( ) ( IndexReader , error ) {
return h . indexRange ( math . MinInt64 , math . MaxInt64 ) , nil
}
func ( h * Head ) indexRange ( mint , maxt int64 ) * headIndexReader {
if hmin := h . MinTime ( ) ; hmin > mint {
mint = hmin
}
return & headIndexReader { head : h , mint : mint , maxt : maxt }
}
type headIndexReader struct {
head * Head
mint , maxt int64
}
func ( h * headIndexReader ) Close ( ) error {
return nil
}
func ( h * headIndexReader ) Symbols ( ) index . StringIter {
2021-09-08 02:18:48 -07:00
return h . head . postings . Symbols ( )
2021-08-03 05:14:26 -07:00
}
// SortedLabelValues returns label values present in the head for the
// specific label name that are within the time range mint to maxt.
// If matchers are specified the returned result set is reduced
// to label values of metrics matching the matchers.
2023-09-14 07:02:04 -07:00
func ( h * headIndexReader ) SortedLabelValues ( ctx context . Context , name string , matchers ... * labels . Matcher ) ( [ ] string , error ) {
values , err := h . LabelValues ( ctx , name , matchers ... )
2021-08-03 05:14:26 -07:00
if err == nil {
2022-09-30 07:33:56 -07:00
slices . Sort ( values )
2021-08-03 05:14:26 -07:00
}
return values , err
}
// LabelValues returns label values present in the head for the
// specific label name that are within the time range mint to maxt.
// If matchers are specified the returned result set is reduced
// to label values of metrics matching the matchers.
2023-09-14 07:02:04 -07:00
func ( h * headIndexReader ) LabelValues ( ctx context . Context , name string , matchers ... * labels . Matcher ) ( [ ] string , error ) {
2021-08-03 05:14:26 -07:00
if h . maxt < h . head . MinTime ( ) || h . mint > h . head . MaxTime ( ) {
return [ ] string { } , nil
}
if len ( matchers ) == 0 {
2023-09-14 07:02:04 -07:00
return h . head . postings . LabelValues ( ctx , name ) , nil
2021-08-03 05:14:26 -07:00
}
2023-09-14 07:02:04 -07:00
return labelValuesWithMatchers ( ctx , h , name , matchers ... )
2021-08-03 05:14:26 -07:00
}
// LabelNames returns all the unique label names present in the head
// that are within the time range mint to maxt.
2023-09-14 01:39:51 -07:00
func ( h * headIndexReader ) LabelNames ( ctx context . Context , matchers ... * labels . Matcher ) ( [ ] string , error ) {
2021-08-03 05:14:26 -07:00
if h . maxt < h . head . MinTime ( ) || h . mint > h . head . MaxTime ( ) {
return [ ] string { } , nil
}
if len ( matchers ) == 0 {
labelNames := h . head . postings . LabelNames ( )
2022-09-30 07:33:56 -07:00
slices . Sort ( labelNames )
2021-08-03 05:14:26 -07:00
return labelNames , nil
}
2023-09-14 01:39:51 -07:00
return labelNamesWithMatchers ( ctx , h , matchers ... )
2021-08-03 05:14:26 -07:00
}
// Postings returns the postings list iterator for the label pairs.
2023-09-13 08:45:06 -07:00
func ( h * headIndexReader ) Postings ( ctx context . Context , name string , values ... string ) ( index . Postings , error ) {
Label values with matchers by intersecting postings (#9907)
* LabelValues w/matchers by intersecting postings
Instead of iterating all matched series to find the values, this
checks if each one of the label values is present in the matched series
(postings).
Pending to be benchmarked.
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Benchmark labelValuesWithMatchers
name old time/op new time/op
Querier/Head/labelValuesWithMatchers/i_with_n="1" 157ms ± 0% 48ms ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 1.80s ± 0% 0.46s ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 144ms ± 0% 57ms ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 304ms ± 0% 111ms ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 761ms ± 0% 164ms ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 6.11µs ± 0% 6.62µs ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 117ms ± 0% 62ms ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 1.44s ± 0% 0.24s ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 92.1ms ± 0% 70.3ms ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 196ms ± 0% 115ms ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 1.23s ± 0% 0.21s ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 1.06ms ± 0% 0.88ms ± 0%
name old alloc/op new alloc/op
Querier/Head/labelValuesWithMatchers/i_with_n="1" 29.5MB ± 0% 26.9MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 46.8MB ± 0% 251.5MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 29.5MB ± 0% 22.3MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 46.8MB ± 0% 23.9MB ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 10.3kB ± 0% 138535.2kB ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 5.54kB ± 0% 7.09kB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 39.1MB ± 0% 28.5MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 287MB ± 0% 253MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 34.3MB ± 0% 23.9MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 51.6MB ± 0% 25.5MB ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 144MB ± 0% 139MB ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 6.43kB ± 0% 8.66kB ± 0%
name old allocs/op new allocs/op
Querier/Head/labelValuesWithMatchers/i_with_n="1" 104k ± 0% 500k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 204k ± 0% 600k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 104k ± 0% 500k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 204k ± 0% 500k ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 66.0 ± 0% 255.0 ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 61.0 ± 0% 205.0 ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 304k ± 0% 600k ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 5.20M ± 0% 0.70M ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 204k ± 0% 600k ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 304k ± 0% 600k ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 3.00M ± 0% 0.00M ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 61.0 ± 0% 247.0 ± 0%
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Don't expand postings to intersect them
Using a min heap we can check whether matched postings intersect with
each one of the label values postings. This avoid expanding postings
(and thus having all of them in memory at any point).
Slightly slower than the expanding postings version for some cases, but
definitely pays the price once the cardinality grows.
Still offers 10x latency improvement where previous latencies were
reaching 1s.
Benchmark results:
name \ time/op old.txt intersect.txt intersect_noexpand.txt
Querier/Head/labelValuesWithMatchers/i_with_n="1" 157ms ± 0% 48ms ± 0% 110ms ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 1.80s ± 0% 0.46s ± 0% 0.18s ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 144ms ± 0% 57ms ± 0% 125ms ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 304ms ± 0% 111ms ± 0% 177ms ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 761ms ± 0% 164ms ± 0% 134ms ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 6.11µs ± 0% 6.62µs ± 0% 4.29µs ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 117ms ± 0% 62ms ± 0% 120ms ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 1.44s ± 0% 0.24s ± 0% 0.15s ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 92.1ms ± 0% 70.3ms ± 0% 125.4ms ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 196ms ± 0% 115ms ± 0% 170ms ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 1.23s ± 0% 0.21s ± 0% 0.14s ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 1.06ms ± 0% 0.88ms ± 0% 0.92ms ± 0%
name \ alloc/op old.txt intersect.txt intersect_noexpand.txt
Querier/Head/labelValuesWithMatchers/i_with_n="1" 29.5MB ± 0% 26.9MB ± 0% 19.1MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 46.8MB ± 0% 251.5MB ± 0% 36.3MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 29.5MB ± 0% 22.3MB ± 0% 19.1MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 46.8MB ± 0% 23.9MB ± 0% 20.7MB ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 10.3kB ± 0% 138535.2kB ± 0% 6.4kB ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 5.54kB ± 0% 7.09kB ± 0% 4.30kB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 39.1MB ± 0% 28.5MB ± 0% 20.7MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 287MB ± 0% 253MB ± 0% 38MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 34.3MB ± 0% 23.9MB ± 0% 20.7MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 51.6MB ± 0% 25.5MB ± 0% 22.3MB ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 144MB ± 0% 139MB ± 0% 0MB ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 6.43kB ± 0% 8.66kB ± 0% 5.86kB ± 0%
name \ allocs/op old.txt intersect.txt intersect_noexpand.txt
Querier/Head/labelValuesWithMatchers/i_with_n="1" 104k ± 0% 500k ± 0% 300k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 204k ± 0% 600k ± 0% 400k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 104k ± 0% 500k ± 0% 300k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 204k ± 0% 500k ± 0% 300k ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 66.0 ± 0% 255.0 ± 0% 139.0 ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 61.0 ± 0% 205.0 ± 0% 87.0 ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 304k ± 0% 600k ± 0% 400k ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 5.20M ± 0% 0.70M ± 0% 0.50M ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 204k ± 0% 600k ± 0% 400k ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 304k ± 0% 600k ± 0% 400k ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 3.00M ± 0% 0.00M ± 0% 0.00M ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 61.0 ± 0% 247.0 ± 0% 129.0 ± 0%
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Apply comment suggestions from the code review
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com>
* Change else { if } to else if
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Remove sorting of label values
We were not sorting them before, so no need to sort them now
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com>
2021-12-28 06:59:03 -08:00
switch len ( values ) {
case 0 :
return index . EmptyPostings ( ) , nil
case 1 :
return h . head . postings . Get ( name , values [ 0 ] ) , nil
default :
res := make ( [ ] index . Postings , 0 , len ( values ) )
for _ , value := range values {
2023-01-10 01:51:49 -08:00
if p := h . head . postings . Get ( name , value ) ; ! index . IsEmptyPostingsType ( p ) {
res = append ( res , p )
}
Label values with matchers by intersecting postings (#9907)
* LabelValues w/matchers by intersecting postings
Instead of iterating all matched series to find the values, this
checks if each one of the label values is present in the matched series
(postings).
Pending to be benchmarked.
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Benchmark labelValuesWithMatchers
name old time/op new time/op
Querier/Head/labelValuesWithMatchers/i_with_n="1" 157ms ± 0% 48ms ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 1.80s ± 0% 0.46s ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 144ms ± 0% 57ms ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 304ms ± 0% 111ms ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 761ms ± 0% 164ms ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 6.11µs ± 0% 6.62µs ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 117ms ± 0% 62ms ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 1.44s ± 0% 0.24s ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 92.1ms ± 0% 70.3ms ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 196ms ± 0% 115ms ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 1.23s ± 0% 0.21s ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 1.06ms ± 0% 0.88ms ± 0%
name old alloc/op new alloc/op
Querier/Head/labelValuesWithMatchers/i_with_n="1" 29.5MB ± 0% 26.9MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 46.8MB ± 0% 251.5MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 29.5MB ± 0% 22.3MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 46.8MB ± 0% 23.9MB ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 10.3kB ± 0% 138535.2kB ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 5.54kB ± 0% 7.09kB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 39.1MB ± 0% 28.5MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 287MB ± 0% 253MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 34.3MB ± 0% 23.9MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 51.6MB ± 0% 25.5MB ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 144MB ± 0% 139MB ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 6.43kB ± 0% 8.66kB ± 0%
name old allocs/op new allocs/op
Querier/Head/labelValuesWithMatchers/i_with_n="1" 104k ± 0% 500k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 204k ± 0% 600k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 104k ± 0% 500k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 204k ± 0% 500k ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 66.0 ± 0% 255.0 ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 61.0 ± 0% 205.0 ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 304k ± 0% 600k ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 5.20M ± 0% 0.70M ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 204k ± 0% 600k ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 304k ± 0% 600k ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 3.00M ± 0% 0.00M ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 61.0 ± 0% 247.0 ± 0%
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Don't expand postings to intersect them
Using a min heap we can check whether matched postings intersect with
each one of the label values postings. This avoid expanding postings
(and thus having all of them in memory at any point).
Slightly slower than the expanding postings version for some cases, but
definitely pays the price once the cardinality grows.
Still offers 10x latency improvement where previous latencies were
reaching 1s.
Benchmark results:
name \ time/op old.txt intersect.txt intersect_noexpand.txt
Querier/Head/labelValuesWithMatchers/i_with_n="1" 157ms ± 0% 48ms ± 0% 110ms ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 1.80s ± 0% 0.46s ± 0% 0.18s ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 144ms ± 0% 57ms ± 0% 125ms ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 304ms ± 0% 111ms ± 0% 177ms ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 761ms ± 0% 164ms ± 0% 134ms ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 6.11µs ± 0% 6.62µs ± 0% 4.29µs ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 117ms ± 0% 62ms ± 0% 120ms ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 1.44s ± 0% 0.24s ± 0% 0.15s ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 92.1ms ± 0% 70.3ms ± 0% 125.4ms ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 196ms ± 0% 115ms ± 0% 170ms ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 1.23s ± 0% 0.21s ± 0% 0.14s ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 1.06ms ± 0% 0.88ms ± 0% 0.92ms ± 0%
name \ alloc/op old.txt intersect.txt intersect_noexpand.txt
Querier/Head/labelValuesWithMatchers/i_with_n="1" 29.5MB ± 0% 26.9MB ± 0% 19.1MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 46.8MB ± 0% 251.5MB ± 0% 36.3MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 29.5MB ± 0% 22.3MB ± 0% 19.1MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 46.8MB ± 0% 23.9MB ± 0% 20.7MB ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 10.3kB ± 0% 138535.2kB ± 0% 6.4kB ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 5.54kB ± 0% 7.09kB ± 0% 4.30kB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 39.1MB ± 0% 28.5MB ± 0% 20.7MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 287MB ± 0% 253MB ± 0% 38MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 34.3MB ± 0% 23.9MB ± 0% 20.7MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 51.6MB ± 0% 25.5MB ± 0% 22.3MB ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 144MB ± 0% 139MB ± 0% 0MB ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 6.43kB ± 0% 8.66kB ± 0% 5.86kB ± 0%
name \ allocs/op old.txt intersect.txt intersect_noexpand.txt
Querier/Head/labelValuesWithMatchers/i_with_n="1" 104k ± 0% 500k ± 0% 300k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 204k ± 0% 600k ± 0% 400k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 104k ± 0% 500k ± 0% 300k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 204k ± 0% 500k ± 0% 300k ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 66.0 ± 0% 255.0 ± 0% 139.0 ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 61.0 ± 0% 205.0 ± 0% 87.0 ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 304k ± 0% 600k ± 0% 400k ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 5.20M ± 0% 0.70M ± 0% 0.50M ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 204k ± 0% 600k ± 0% 400k ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 304k ± 0% 600k ± 0% 400k ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 3.00M ± 0% 0.00M ± 0% 0.00M ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 61.0 ± 0% 247.0 ± 0% 129.0 ± 0%
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Apply comment suggestions from the code review
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com>
* Change else { if } to else if
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Remove sorting of label values
We were not sorting them before, so no need to sort them now
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com>
2021-12-28 06:59:03 -08:00
}
2023-09-13 08:45:06 -07:00
return index . Merge ( ctx , res ... ) , nil
2021-08-03 05:14:26 -07:00
}
}
2024-05-09 02:55:30 -07:00
func ( h * headIndexReader ) PostingsForLabelMatching ( ctx context . Context , name string , match func ( string ) bool ) index . Postings {
return h . head . postings . PostingsForLabelMatching ( ctx , name , match )
}
2021-08-03 05:14:26 -07:00
func ( h * headIndexReader ) SortedPostings ( p index . Postings ) index . Postings {
series := make ( [ ] * memSeries , 0 , 128 )
// Fetch all the series only once.
for p . Next ( ) {
2021-11-06 03:10:04 -07:00
s := h . head . series . getByID ( chunks . HeadSeriesRef ( p . At ( ) ) )
2021-08-03 05:14:26 -07:00
if s == nil {
level . Debug ( h . head . logger ) . Log ( "msg" , "Looked up series not found" )
} else {
series = append ( series , s )
}
}
if err := p . Err ( ) ; err != nil {
2023-11-16 10:54:41 -08:00
return index . ErrPostings ( fmt . Errorf ( "expand postings: %w" , err ) )
2021-08-03 05:14:26 -07:00
}
2023-09-21 13:53:51 -07:00
slices . SortFunc ( series , func ( a , b * memSeries ) int {
return labels . Compare ( a . lset , b . lset )
2021-08-03 05:14:26 -07:00
} )
// Convert back to list.
2021-11-06 03:10:04 -07:00
ep := make ( [ ] storage . SeriesRef , 0 , len ( series ) )
2021-08-03 05:14:26 -07:00
for _ , p := range series {
2021-11-06 03:10:04 -07:00
ep = append ( ep , storage . SeriesRef ( p . ref ) )
2021-08-03 05:14:26 -07:00
}
return index . NewListPostings ( ep )
}
2024-01-29 03:57:27 -08:00
// ShardedPostings implements IndexReader. This function returns an failing postings list if sharding
// has not been enabled in the Head.
func ( h * headIndexReader ) ShardedPostings ( p index . Postings , shardIndex , shardCount uint64 ) index . Postings {
if ! h . head . opts . EnableSharding {
return index . ErrPostings ( errors . New ( "sharding is disabled" ) )
}
out := make ( [ ] storage . SeriesRef , 0 , 128 )
for p . Next ( ) {
s := h . head . series . getByID ( chunks . HeadSeriesRef ( p . At ( ) ) )
if s == nil {
level . Debug ( h . head . logger ) . Log ( "msg" , "Looked up series not found" )
continue
}
// Check if the series belong to the shard.
if s . shardHash % shardCount != shardIndex {
continue
}
out = append ( out , storage . SeriesRef ( s . ref ) )
}
return index . NewListPostings ( out )
}
2021-08-03 05:14:26 -07:00
// Series returns the series for the given reference.
2024-01-29 03:57:27 -08:00
// Chunks are skipped if chks is nil.
2022-12-15 10:19:15 -08:00
func ( h * headIndexReader ) Series ( ref storage . SeriesRef , builder * labels . ScratchBuilder , chks * [ ] chunks . Meta ) error {
2021-11-06 03:10:04 -07:00
s := h . head . series . getByID ( chunks . HeadSeriesRef ( ref ) )
2021-08-03 05:14:26 -07:00
if s == nil {
h . head . metrics . seriesNotFound . Inc ( )
return storage . ErrNotFound
}
2022-12-15 10:19:15 -08:00
builder . Assign ( s . lset )
2021-08-03 05:14:26 -07:00
2024-01-29 03:57:27 -08:00
if chks == nil {
return nil
}
2021-08-03 05:14:26 -07:00
s . Lock ( )
defer s . Unlock ( )
* chks = ( * chks ) [ : 0 ]
for i , c := range s . mmappedChunks {
// Do not expose chunks that are outside of the specified range.
if ! c . OverlapsClosedInterval ( h . mint , h . maxt ) {
continue
}
* chks = append ( * chks , chunks . Meta {
MinTime : c . minTime ,
MaxTime : c . maxTime ,
2021-11-17 05:05:10 -08:00
Ref : chunks . ChunkRef ( chunks . NewHeadChunkRef ( s . ref , s . headChunkID ( i ) ) ) ,
2021-08-03 05:14:26 -07:00
} )
}
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 02:10:24 -07:00
if s . headChunks != nil {
var maxTime int64
var i , j int
for i = s . headChunks . len ( ) - 1 ; i >= 0 ; i -- {
chk := s . headChunks . atOffset ( i )
if i == 0 {
// Set the head chunk as open (being appended to) for the first headChunk.
maxTime = math . MaxInt64
} else {
maxTime = chk . maxTime
}
if chk . OverlapsClosedInterval ( h . mint , h . maxt ) {
* chks = append ( * chks , chunks . Meta {
MinTime : chk . minTime ,
MaxTime : maxTime ,
Ref : chunks . ChunkRef ( chunks . NewHeadChunkRef ( s . ref , s . headChunkID ( len ( s . mmappedChunks ) + j ) ) ) ,
} )
}
j ++
}
2021-08-03 05:14:26 -07:00
}
return nil
}
2022-09-20 10:05:50 -07:00
// headChunkID returns the HeadChunkID referred to by the given position.
// * 0 <= pos < len(s.mmappedChunks) refer to s.mmappedChunks[pos]
2023-10-03 13:09:25 -07:00
// * pos >= len(s.mmappedChunks) refers to s.headChunks linked list.
2021-11-17 05:05:10 -08:00
func ( s * memSeries ) headChunkID ( pos int ) chunks . HeadChunkID {
return chunks . HeadChunkID ( pos ) + s . firstChunkID
2021-08-03 05:14:26 -07:00
}
2022-09-20 10:05:50 -07:00
// oooHeadChunkID returns the HeadChunkID referred to by the given position.
// * 0 <= pos < len(s.oooMmappedChunks) refer to s.oooMmappedChunks[pos]
// * pos == len(s.oooMmappedChunks) refers to s.oooHeadChunk
2022-12-28 02:19:41 -08:00
// The caller must ensure that s.ooo is not nil.
2022-09-20 10:05:50 -07:00
func ( s * memSeries ) oooHeadChunkID ( pos int ) chunks . HeadChunkID {
2022-12-28 02:19:41 -08:00
return chunks . HeadChunkID ( pos ) + s . ooo . firstOOOChunkID
2022-09-20 10:05:50 -07:00
}
2021-08-03 05:14:26 -07:00
// LabelValueFor returns label value for the given label name in the series referred to by ID.
2023-09-14 07:02:04 -07:00
func ( h * headIndexReader ) LabelValueFor ( _ context . Context , id storage . SeriesRef , label string ) ( string , error ) {
2021-11-06 03:10:04 -07:00
memSeries := h . head . series . getByID ( chunks . HeadSeriesRef ( id ) )
2021-08-03 05:14:26 -07:00
if memSeries == nil {
return "" , storage . ErrNotFound
}
value := memSeries . lset . Get ( label )
if value == "" {
return "" , storage . ErrNotFound
}
return value , nil
}
// LabelNamesFor returns all the label names for the series referred to by IDs.
// The names returned are sorted.
2023-09-14 01:39:51 -07:00
func ( h * headIndexReader ) LabelNamesFor ( ctx context . Context , ids ... storage . SeriesRef ) ( [ ] string , error ) {
2021-08-03 05:14:26 -07:00
namesMap := make ( map [ string ] struct { } )
for _ , id := range ids {
2023-09-14 01:39:51 -07:00
if ctx . Err ( ) != nil {
return nil , ctx . Err ( )
}
2021-11-06 03:10:04 -07:00
memSeries := h . head . series . getByID ( chunks . HeadSeriesRef ( id ) )
2021-08-03 05:14:26 -07:00
if memSeries == nil {
return nil , storage . ErrNotFound
}
2022-03-09 14:17:40 -08:00
memSeries . lset . Range ( func ( lbl labels . Label ) {
2021-08-03 05:14:26 -07:00
namesMap [ lbl . Name ] = struct { } { }
2022-03-09 14:17:40 -08:00
} )
2021-08-03 05:14:26 -07:00
}
names := make ( [ ] string , 0 , len ( namesMap ) )
for name := range namesMap {
names = append ( names , name )
}
2022-09-30 07:33:56 -07:00
slices . Sort ( names )
2021-08-03 05:14:26 -07:00
return names , nil
}
// Chunks returns a ChunkReader against the block.
func ( h * Head ) Chunks ( ) ( ChunkReader , error ) {
return h . chunksRange ( math . MinInt64 , math . MaxInt64 , h . iso . State ( math . MinInt64 , math . MaxInt64 ) )
}
func ( h * Head ) chunksRange ( mint , maxt int64 , is * isolationState ) ( * headChunkReader , error ) {
h . closedMtx . Lock ( )
defer h . closedMtx . Unlock ( )
if h . closed {
return nil , errors . New ( "can't read from a closed head" )
}
if hmin := h . MinTime ( ) ; hmin > mint {
mint = hmin
}
return & headChunkReader {
head : h ,
mint : mint ,
maxt : maxt ,
isoState : is ,
} , nil
}
type headChunkReader struct {
head * Head
mint , maxt int64
isoState * isolationState
}
func ( h * headChunkReader ) Close ( ) error {
2022-09-27 07:01:23 -07:00
if h . isoState != nil {
h . isoState . Close ( )
}
2021-08-03 05:14:26 -07:00
return nil
}
2023-11-28 02:14:29 -08:00
// ChunkOrIterable returns the chunk for the reference number.
func ( h * headChunkReader ) ChunkOrIterable ( meta chunks . Meta ) ( chunkenc . Chunk , chunkenc . Iterable , error ) {
2023-02-19 09:34:51 -08:00
chk , _ , err := h . chunk ( meta , false )
2023-11-28 02:14:29 -08:00
return chk , nil , err
2023-02-19 09:34:51 -08:00
}
// ChunkWithCopy returns the chunk for the reference number.
// If the chunk is the in-memory chunk, then it makes a copy and returns the copied chunk.
func ( h * headChunkReader ) ChunkWithCopy ( meta chunks . Meta ) ( chunkenc . Chunk , int64 , error ) {
return h . chunk ( meta , true )
}
// chunk returns the chunk for the reference number.
// If copyLastChunk is true, then it makes a copy of the head chunk if asked for it.
// Also returns max time of the chunk.
func ( h * headChunkReader ) chunk ( meta chunks . Meta , copyLastChunk bool ) ( chunkenc . Chunk , int64 , error ) {
2022-09-20 10:05:50 -07:00
sid , cid := chunks . HeadChunkRef ( meta . Ref ) . Unpack ( )
2021-08-03 05:14:26 -07:00
s := h . head . series . getByID ( sid )
// This means that the series has been garbage collected.
if s == nil {
2023-02-19 09:34:51 -08:00
return nil , 0 , storage . ErrNotFound
2021-08-03 05:14:26 -07:00
}
s . Lock ( )
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 02:10:24 -07:00
c , headChunk , isOpen , err := s . chunk ( cid , h . head . chunkDiskMapper , & h . head . memChunkPool )
2021-08-03 05:14:26 -07:00
if err != nil {
s . Unlock ( )
2023-02-19 09:34:51 -08:00
return nil , 0 , err
2021-08-03 05:14:26 -07:00
}
defer func ( ) {
2023-02-19 09:34:51 -08:00
if ! headChunk {
2021-08-03 05:14:26 -07:00
// Set this to nil so that Go GC can collect it after it has been used.
c . chunk = nil
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 02:10:24 -07:00
c . prev = nil
2022-09-15 00:52:09 -07:00
h . head . memChunkPool . Put ( c )
2021-08-03 05:14:26 -07:00
}
} ( )
// This means that the chunk is outside the specified range.
if ! c . OverlapsClosedInterval ( h . mint , h . maxt ) {
s . Unlock ( )
2023-02-19 09:34:51 -08:00
return nil , 0 , storage . ErrNotFound
}
chk , maxTime := c . chunk , c . maxTime
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 02:10:24 -07:00
if headChunk && isOpen && copyLastChunk {
2023-02-19 09:34:51 -08:00
// The caller may ask to copy the head chunk in order to take the
// bytes of the chunk without causing the race between read and append.
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 02:10:24 -07:00
b := s . headChunks . chunk . Bytes ( )
2023-02-19 09:34:51 -08:00
newB := make ( [ ] byte , len ( b ) )
copy ( newB , b ) // TODO(codesome): Use bytes.Clone() when we upgrade to Go 1.20.
// TODO(codesome): Put back in the pool (non-trivial).
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 02:10:24 -07:00
chk , err = h . head . opts . ChunkPool . Get ( s . headChunks . chunk . Encoding ( ) , newB )
2023-02-19 09:34:51 -08:00
if err != nil {
return nil , 0 , err
}
2021-08-03 05:14:26 -07:00
}
s . Unlock ( )
2023-05-19 01:24:06 -07:00
return & safeHeadChunk {
2023-02-19 09:34:51 -08:00
Chunk : chk ,
2023-02-21 01:30:11 -08:00
s : s ,
cid : cid ,
isoState : h . isoState ,
2023-02-19 09:34:51 -08:00
} , maxTime , nil
2021-08-03 05:14:26 -07:00
}
2021-11-17 05:05:10 -08:00
// chunk returns the chunk for the HeadChunkID from memory or by m-mapping it from the disk.
2023-03-21 02:45:36 -07:00
// If headChunk is false, it means that the returned *memChunk
2021-11-17 02:21:27 -08:00
// (and not the chunkenc.Chunk inside it) can be garbage collected after its usage.
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 02:10:24 -07:00
// if isOpen is true, it means that the returned *memChunk is used for appends.
func ( s * memSeries ) chunk ( id chunks . HeadChunkID , chunkDiskMapper * chunks . ChunkDiskMapper , memChunkPool * sync . Pool ) ( chunk * memChunk , headChunk , isOpen bool , err error ) {
2021-08-03 05:14:26 -07:00
// ix represents the index of chunk in the s.mmappedChunks slice. The chunk id's are
// incremented by 1 when new chunk is created, hence (id - firstChunkID) gives the slice index.
// The max index for the s.mmappedChunks slice can be len(s.mmappedChunks)-1, hence if the ix
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 02:10:24 -07:00
// is >= len(s.mmappedChunks), it represents one of the chunks on s.headChunks linked list.
// The order of elemens is different for slice and linked list.
// For s.mmappedChunks slice newer chunks are appended to it.
// For s.headChunks list newer chunks are prepended to it.
//
// memSeries {
// mmappedChunks: [t0, t1, t2]
// headChunk: {t5}->{t4}->{t3}
// }
2021-11-17 05:05:10 -08:00
ix := int ( id ) - int ( s . firstChunkID )
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 02:10:24 -07:00
var headChunksLen int
if s . headChunks != nil {
headChunksLen = s . headChunks . len ( )
2021-08-03 05:14:26 -07:00
}
2023-02-19 09:34:51 -08:00
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 02:10:24 -07:00
if ix < 0 || ix > len ( s . mmappedChunks ) + headChunksLen - 1 {
return nil , false , false , storage . ErrNotFound
2021-08-03 05:14:26 -07:00
}
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 02:10:24 -07:00
if ix < len ( s . mmappedChunks ) {
chk , err := chunkDiskMapper . Chunk ( s . mmappedChunks [ ix ] . ref )
if err != nil {
2023-11-16 10:54:41 -08:00
var cerr * chunks . CorruptionErr
if errors . As ( err , & cerr ) {
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 02:10:24 -07:00
panic ( err )
}
return nil , false , false , err
2021-08-03 05:14:26 -07:00
}
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 02:10:24 -07:00
mc := memChunkPool . Get ( ) . ( * memChunk )
mc . chunk = chk
mc . minTime = s . mmappedChunks [ ix ] . minTime
mc . maxTime = s . mmappedChunks [ ix ] . maxTime
return mc , false , false , nil
2021-08-03 05:14:26 -07:00
}
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 02:10:24 -07:00
ix -= len ( s . mmappedChunks )
offset := headChunksLen - ix - 1
// headChunks is a linked list where first element is the most recent one and the last one is the oldest.
// This order is reversed when compared with mmappedChunks, since mmappedChunks[0] is the oldest chunk,
// while headChunk.atOffset(0) would give us the most recent chunk.
// So when calling headChunk.atOffset() we need to reverse the value of ix.
elem := s . headChunks . atOffset ( offset )
if elem == nil {
// This should never really happen and would mean that headChunksLen value is NOT equal
// to the length of the headChunks list.
return nil , false , false , storage . ErrNotFound
}
return elem , true , offset == 0 , nil
2021-08-03 05:14:26 -07:00
}
2023-11-28 02:14:29 -08:00
// oooMergedChunks return an iterable over one or more OOO chunks for the given
// chunks.Meta reference from memory or by m-mapping it from the disk. The
// returned iterable will be a merge of all the overlapping chunks, if any,
// amongst all the chunks in the OOOHead.
2022-09-20 10:05:50 -07:00
// This function is not thread safe unless the caller holds a lock.
2022-12-28 02:19:41 -08:00
// The caller must ensure that s.ooo is not nil.
2023-11-28 02:14:29 -08:00
func ( s * memSeries ) oooMergedChunks ( meta chunks . Meta , cdm * chunks . ChunkDiskMapper , mint , maxt int64 ) ( * mergedOOOChunks , error ) {
2022-09-20 10:05:50 -07:00
_ , cid := chunks . HeadChunkRef ( meta . Ref ) . Unpack ( )
// ix represents the index of chunk in the s.mmappedChunks slice. The chunk meta's are
// incremented by 1 when new chunk is created, hence (meta - firstChunkID) gives the slice index.
// The max index for the s.mmappedChunks slice can be len(s.mmappedChunks)-1, hence if the ix
// is len(s.mmappedChunks), it represents the next chunk, which is the head chunk.
2022-12-28 02:19:41 -08:00
ix := int ( cid ) - int ( s . ooo . firstOOOChunkID )
if ix < 0 || ix > len ( s . ooo . oooMmappedChunks ) {
2022-09-20 10:05:50 -07:00
return nil , storage . ErrNotFound
}
2022-12-28 02:19:41 -08:00
if ix == len ( s . ooo . oooMmappedChunks ) {
if s . ooo . oooHeadChunk == nil {
2022-09-20 10:05:50 -07:00
return nil , errors . New ( "invalid ooo head chunk" )
}
}
// We create a temporary slice of chunk metas to hold the information of all
// possible chunks that may overlap with the requested chunk.
2022-12-28 02:19:41 -08:00
tmpChks := make ( [ ] chunkMetaAndChunkDiskMapperRef , 0 , len ( s . ooo . oooMmappedChunks ) )
2022-09-20 10:05:50 -07:00
2022-12-28 02:19:41 -08:00
oooHeadRef := chunks . ChunkRef ( chunks . NewHeadChunkRef ( s . ref , s . oooHeadChunkID ( len ( s . ooo . oooMmappedChunks ) ) ) )
if s . ooo . oooHeadChunk != nil && s . ooo . oooHeadChunk . OverlapsClosedInterval ( mint , maxt ) {
2022-09-20 10:05:50 -07:00
// We only want to append the head chunk if this chunk existed when
// Series() was called. This brings consistency in case new data
// is added in between Series() and Chunk() calls.
if oooHeadRef == meta . OOOLastRef {
tmpChks = append ( tmpChks , chunkMetaAndChunkDiskMapperRef {
meta : chunks . Meta {
// Ignoring samples added before and after the last known min and max time for this chunk.
MinTime : meta . OOOLastMinTime ,
MaxTime : meta . OOOLastMaxTime ,
Ref : oooHeadRef ,
} ,
} )
}
}
2022-12-28 02:19:41 -08:00
for i , c := range s . ooo . oooMmappedChunks {
2022-09-20 10:05:50 -07:00
chunkRef := chunks . ChunkRef ( chunks . NewHeadChunkRef ( s . ref , s . oooHeadChunkID ( i ) ) )
// We can skip chunks that came in later than the last known OOOLastRef.
if chunkRef > meta . OOOLastRef {
break
}
style: Replace `else if` cascades with `switch`
Wiser coders than myself have come to the conclusion that a `switch`
statement is almost always superior to a statement that includes any
`else if`.
The exceptions that I have found in our codebase are just these two:
* The `if else` is followed by an additional statement before the next
condition (separated by a `;`).
* The whole thing is within a `for` loop and `break` statements are
used. In this case, using `switch` would require tagging the `for`
loop, which probably tips the balance.
Why are `switch` statements more readable?
For one, fewer curly braces. But more importantly, the conditions all
have the same alignment, so the whole thing follows the natural flow
of going down a list of conditions. With `else if`, in contrast, all
conditions but the first are "hidden" behind `} else if `, harder to
spot and (for no good reason) presented differently from the first
condition.
I'm sure the aforemention wise coders can list even more reasons.
In any case, I like it so much that I have found myself recommending
it in code reviews. I would like to make it a habit in our code base,
without making it a hard requirement that we would test on the CI. But
for that, there has to be a role model, so this commit eliminates all
`if else` occurrences, unless it is autogenerated code or fits one of
the exceptions above.
Signed-off-by: beorn7 <beorn@grafana.com>
2023-04-12 07:14:31 -07:00
switch {
case chunkRef == meta . OOOLastRef :
2022-09-20 10:05:50 -07:00
tmpChks = append ( tmpChks , chunkMetaAndChunkDiskMapperRef {
meta : chunks . Meta {
MinTime : meta . OOOLastMinTime ,
MaxTime : meta . OOOLastMaxTime ,
Ref : chunkRef ,
} ,
ref : c . ref ,
origMinT : c . minTime ,
origMaxT : c . maxTime ,
} )
style: Replace `else if` cascades with `switch`
Wiser coders than myself have come to the conclusion that a `switch`
statement is almost always superior to a statement that includes any
`else if`.
The exceptions that I have found in our codebase are just these two:
* The `if else` is followed by an additional statement before the next
condition (separated by a `;`).
* The whole thing is within a `for` loop and `break` statements are
used. In this case, using `switch` would require tagging the `for`
loop, which probably tips the balance.
Why are `switch` statements more readable?
For one, fewer curly braces. But more importantly, the conditions all
have the same alignment, so the whole thing follows the natural flow
of going down a list of conditions. With `else if`, in contrast, all
conditions but the first are "hidden" behind `} else if `, harder to
spot and (for no good reason) presented differently from the first
condition.
I'm sure the aforemention wise coders can list even more reasons.
In any case, I like it so much that I have found myself recommending
it in code reviews. I would like to make it a habit in our code base,
without making it a hard requirement that we would test on the CI. But
for that, there has to be a role model, so this commit eliminates all
`if else` occurrences, unless it is autogenerated code or fits one of
the exceptions above.
Signed-off-by: beorn7 <beorn@grafana.com>
2023-04-12 07:14:31 -07:00
case c . OverlapsClosedInterval ( mint , maxt ) :
2022-09-20 10:05:50 -07:00
tmpChks = append ( tmpChks , chunkMetaAndChunkDiskMapperRef {
meta : chunks . Meta {
MinTime : c . minTime ,
MaxTime : c . maxTime ,
Ref : chunkRef ,
} ,
ref : c . ref ,
} )
}
}
// Next we want to sort all the collected chunks by min time so we can find
// those that overlap and stop when we know the rest don't.
2023-07-08 05:45:56 -07:00
slices . SortFunc ( tmpChks , refLessByMinTimeAndMinRef )
2022-09-20 10:05:50 -07:00
mc := & mergedOOOChunks { }
absoluteMax := int64 ( math . MinInt64 )
for _ , c := range tmpChks {
2023-11-28 02:14:29 -08:00
if c . meta . Ref != meta . Ref && ( len ( mc . chunkIterables ) == 0 || c . meta . MinTime > absoluteMax ) {
2022-09-20 10:05:50 -07:00
continue
}
2023-11-28 02:14:29 -08:00
var iterable chunkenc . Iterable
2022-09-20 10:05:50 -07:00
if c . meta . Ref == oooHeadRef {
var xor * chunkenc . XORChunk
2023-11-28 02:14:29 -08:00
var err error
2022-09-20 10:05:50 -07:00
// If head chunk min and max time match the meta OOO markers
// that means that the chunk has not expanded so we can append
// it as it is.
2022-12-28 02:19:41 -08:00
if s . ooo . oooHeadChunk . minTime == meta . OOOLastMinTime && s . ooo . oooHeadChunk . maxTime == meta . OOOLastMaxTime {
xor , err = s . ooo . oooHeadChunk . chunk . ToXOR ( ) // TODO(jesus.vazquez) (This is an optimization idea that has no priority and might not be that useful) See if we could use a copy of the underlying slice. That would leave the more expensive ToXOR() function only for the usecase where Bytes() is called.
2022-09-20 10:05:50 -07:00
} else {
// We need to remove samples that are outside of the markers
2022-12-28 02:19:41 -08:00
xor , err = s . ooo . oooHeadChunk . chunk . ToXORBetweenTimestamps ( meta . OOOLastMinTime , meta . OOOLastMaxTime )
2022-09-20 10:05:50 -07:00
}
if err != nil {
2023-11-16 10:54:41 -08:00
return nil , fmt . Errorf ( "failed to convert ooo head chunk to xor chunk: %w" , err )
2022-09-20 10:05:50 -07:00
}
2023-11-28 02:14:29 -08:00
iterable = xor
2022-09-20 10:05:50 -07:00
} else {
chk , err := cdm . Chunk ( c . ref )
if err != nil {
2023-11-16 10:54:41 -08:00
var cerr * chunks . CorruptionErr
if errors . As ( err , & cerr ) {
return nil , fmt . Errorf ( "invalid ooo mmapped chunk: %w" , err )
2022-09-20 10:05:50 -07:00
}
return nil , err
}
if c . meta . Ref == meta . OOOLastRef &&
( c . origMinT != meta . OOOLastMinTime || c . origMaxT != meta . OOOLastMaxTime ) {
// The head expanded and was memory mapped so now we need to
// wrap the chunk within a chunk that doesnt allows us to iterate
// through samples out of the OOOLastMinT and OOOLastMaxT
// markers.
2023-11-28 02:14:29 -08:00
iterable = boundedIterable { chk , meta . OOOLastMinTime , meta . OOOLastMaxTime }
2022-09-20 10:05:50 -07:00
} else {
2023-11-28 02:14:29 -08:00
iterable = chk
2022-09-20 10:05:50 -07:00
}
}
2023-11-28 02:14:29 -08:00
mc . chunkIterables = append ( mc . chunkIterables , iterable )
2022-09-20 10:05:50 -07:00
if c . meta . MaxTime > absoluteMax {
absoluteMax = c . meta . MaxTime
}
}
return mc , nil
}
2023-11-28 02:14:29 -08:00
var _ chunkenc . Iterable = & boundedIterable { }
2022-09-20 10:05:50 -07:00
2023-11-28 02:14:29 -08:00
// boundedIterable is an implementation of chunkenc.Iterable that uses a
2022-09-20 10:05:50 -07:00
// boundedIterator that only iterates through samples which timestamps are
2023-10-03 13:09:25 -07:00
// >= minT and <= maxT.
2023-11-28 02:14:29 -08:00
type boundedIterable struct {
chunk chunkenc . Chunk
minT int64
maxT int64
2022-09-20 10:05:50 -07:00
}
2023-11-28 02:14:29 -08:00
func ( b boundedIterable ) Iterator ( iterator chunkenc . Iterator ) chunkenc . Iterator {
it := b . chunk . Iterator ( iterator )
2022-09-20 10:05:50 -07:00
if it == nil {
panic ( "iterator shouldn't be nil" )
}
return boundedIterator { it , b . minT , b . maxT }
}
var _ chunkenc . Iterator = & boundedIterator { }
// boundedIterator is an implementation of Iterator that only iterates through
2023-10-03 13:09:25 -07:00
// samples which timestamps are >= minT and <= maxT.
2022-09-20 10:05:50 -07:00
type boundedIterator struct {
chunkenc . Iterator
minT int64
maxT int64
}
// Next the first time its called it will advance as many positions as necessary
// until its able to find a sample within the bounds minT and maxT.
// If there are samples within bounds it will advance one by one amongst them.
// If there are no samples within bounds it will return false.
2022-10-11 09:35:35 -07:00
func ( b boundedIterator ) Next ( ) chunkenc . ValueType {
for b . Iterator . Next ( ) == chunkenc . ValFloat {
2022-09-20 10:05:50 -07:00
t , _ := b . Iterator . At ( )
style: Replace `else if` cascades with `switch`
Wiser coders than myself have come to the conclusion that a `switch`
statement is almost always superior to a statement that includes any
`else if`.
The exceptions that I have found in our codebase are just these two:
* The `if else` is followed by an additional statement before the next
condition (separated by a `;`).
* The whole thing is within a `for` loop and `break` statements are
used. In this case, using `switch` would require tagging the `for`
loop, which probably tips the balance.
Why are `switch` statements more readable?
For one, fewer curly braces. But more importantly, the conditions all
have the same alignment, so the whole thing follows the natural flow
of going down a list of conditions. With `else if`, in contrast, all
conditions but the first are "hidden" behind `} else if `, harder to
spot and (for no good reason) presented differently from the first
condition.
I'm sure the aforemention wise coders can list even more reasons.
In any case, I like it so much that I have found myself recommending
it in code reviews. I would like to make it a habit in our code base,
without making it a hard requirement that we would test on the CI. But
for that, there has to be a role model, so this commit eliminates all
`if else` occurrences, unless it is autogenerated code or fits one of
the exceptions above.
Signed-off-by: beorn7 <beorn@grafana.com>
2023-04-12 07:14:31 -07:00
switch {
case t < b . minT :
2022-09-20 10:05:50 -07:00
continue
style: Replace `else if` cascades with `switch`
Wiser coders than myself have come to the conclusion that a `switch`
statement is almost always superior to a statement that includes any
`else if`.
The exceptions that I have found in our codebase are just these two:
* The `if else` is followed by an additional statement before the next
condition (separated by a `;`).
* The whole thing is within a `for` loop and `break` statements are
used. In this case, using `switch` would require tagging the `for`
loop, which probably tips the balance.
Why are `switch` statements more readable?
For one, fewer curly braces. But more importantly, the conditions all
have the same alignment, so the whole thing follows the natural flow
of going down a list of conditions. With `else if`, in contrast, all
conditions but the first are "hidden" behind `} else if `, harder to
spot and (for no good reason) presented differently from the first
condition.
I'm sure the aforemention wise coders can list even more reasons.
In any case, I like it so much that I have found myself recommending
it in code reviews. I would like to make it a habit in our code base,
without making it a hard requirement that we would test on the CI. But
for that, there has to be a role model, so this commit eliminates all
`if else` occurrences, unless it is autogenerated code or fits one of
the exceptions above.
Signed-off-by: beorn7 <beorn@grafana.com>
2023-04-12 07:14:31 -07:00
case t > b . maxT :
2022-10-11 09:35:35 -07:00
return chunkenc . ValNone
style: Replace `else if` cascades with `switch`
Wiser coders than myself have come to the conclusion that a `switch`
statement is almost always superior to a statement that includes any
`else if`.
The exceptions that I have found in our codebase are just these two:
* The `if else` is followed by an additional statement before the next
condition (separated by a `;`).
* The whole thing is within a `for` loop and `break` statements are
used. In this case, using `switch` would require tagging the `for`
loop, which probably tips the balance.
Why are `switch` statements more readable?
For one, fewer curly braces. But more importantly, the conditions all
have the same alignment, so the whole thing follows the natural flow
of going down a list of conditions. With `else if`, in contrast, all
conditions but the first are "hidden" behind `} else if `, harder to
spot and (for no good reason) presented differently from the first
condition.
I'm sure the aforemention wise coders can list even more reasons.
In any case, I like it so much that I have found myself recommending
it in code reviews. I would like to make it a habit in our code base,
without making it a hard requirement that we would test on the CI. But
for that, there has to be a role model, so this commit eliminates all
`if else` occurrences, unless it is autogenerated code or fits one of
the exceptions above.
Signed-off-by: beorn7 <beorn@grafana.com>
2023-04-12 07:14:31 -07:00
default :
return chunkenc . ValFloat
2022-09-20 10:05:50 -07:00
}
}
2022-10-11 09:35:35 -07:00
return chunkenc . ValNone
2022-09-20 10:05:50 -07:00
}
2022-10-11 09:35:35 -07:00
func ( b boundedIterator ) Seek ( t int64 ) chunkenc . ValueType {
2022-09-20 10:05:50 -07:00
if t < b . minT {
// We must seek at least up to b.minT if it is asked for something before that.
2022-10-11 09:35:35 -07:00
val := b . Iterator . Seek ( b . minT )
if ! ( val == chunkenc . ValFloat ) {
return chunkenc . ValNone
2022-09-20 10:05:50 -07:00
}
t , _ := b . Iterator . At ( )
2022-10-11 09:35:35 -07:00
if t <= b . maxT {
return chunkenc . ValFloat
}
2022-09-20 10:05:50 -07:00
}
if t > b . maxT {
// We seek anyway so that the subsequent Next() calls will also return false.
b . Iterator . Seek ( t )
2022-10-11 09:35:35 -07:00
return chunkenc . ValNone
2022-09-20 10:05:50 -07:00
}
return b . Iterator . Seek ( t )
}
2023-10-03 13:09:25 -07:00
// safeHeadChunk makes sure that the chunk can be accessed without a race condition.
2023-05-19 01:24:06 -07:00
type safeHeadChunk struct {
2021-08-03 05:14:26 -07:00
chunkenc . Chunk
2023-02-21 01:30:11 -08:00
s * memSeries
cid chunks . HeadChunkID
isoState * isolationState
2021-08-03 05:14:26 -07:00
}
2023-05-19 01:24:06 -07:00
func ( c * safeHeadChunk ) Iterator ( reuseIter chunkenc . Iterator ) chunkenc . Iterator {
2021-08-03 05:14:26 -07:00
c . s . Lock ( )
2023-02-21 01:30:11 -08:00
it := c . s . iterator ( c . cid , c . Chunk , c . isoState , reuseIter )
2021-08-03 05:14:26 -07:00
c . s . Unlock ( )
return it
}
2021-11-17 05:05:10 -08:00
// iterator returns a chunk iterator for the requested chunkID, or a NopIterator if the requested ID is out of range.
2021-08-03 05:14:26 -07:00
// It is unsafe to call this concurrently with s.append(...) without holding the series lock.
2023-02-21 01:30:11 -08:00
func ( s * memSeries ) iterator ( id chunks . HeadChunkID , c chunkenc . Chunk , isoState * isolationState , it chunkenc . Iterator ) chunkenc . Iterator {
2021-11-17 05:05:10 -08:00
ix := int ( id ) - int ( s . firstChunkID )
2021-08-03 05:14:26 -07:00
2023-02-21 01:30:11 -08:00
numSamples := c . NumSamples ( )
2021-08-03 05:14:26 -07:00
stopAfter := numSamples
2021-11-19 02:11:32 -08:00
if isoState != nil && ! isoState . IsolationDisabled ( ) {
2021-08-03 05:14:26 -07:00
totalSamples := 0 // Total samples in this series.
previousSamples := 0 // Samples before this chunk.
for j , d := range s . mmappedChunks {
totalSamples += int ( d . numSamples )
if j < ix {
previousSamples += int ( d . numSamples )
}
}
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 02:10:24 -07:00
ix -= len ( s . mmappedChunks )
if s . headChunks != nil {
// Iterate all head chunks from the oldest to the newest.
headChunksLen := s . headChunks . len ( )
for j := headChunksLen - 1 ; j >= 0 ; j -- {
chk := s . headChunks . atOffset ( j )
chkSamples := chk . chunk . NumSamples ( )
totalSamples += chkSamples
// Chunk ID is len(s.mmappedChunks) + $(headChunks list position).
// Where $(headChunks list position) is zero for the oldest chunk and $(s.headChunks.len() - 1)
// for the newest (open) chunk.
if headChunksLen - 1 - j < ix {
previousSamples += chkSamples
}
}
2021-08-03 05:14:26 -07:00
}
// Removing the extra transactionIDs that are relevant for samples that
// come after this chunk, from the total transactionIDs.
2023-10-21 05:38:01 -07:00
appendIDsToConsider := int ( s . txs . txIDCount ) - ( totalSamples - ( previousSamples + numSamples ) )
2021-08-03 05:14:26 -07:00
// Iterate over the appendIDs, find the first one that the isolation state says not
// to return.
it := s . txs . iterator ( )
for index := 0 ; index < appendIDsToConsider ; index ++ {
appendID := it . At ( )
if appendID <= isoState . maxAppendID { // Easy check first.
if _ , ok := isoState . incompleteAppends [ appendID ] ; ! ok {
it . Next ( )
continue
}
}
stopAfter = numSamples - ( appendIDsToConsider - index )
if stopAfter < 0 {
stopAfter = 0 // Stopped in a previous chunk.
}
break
}
}
if stopAfter == 0 {
return chunkenc . NewNopIterator ( )
}
2022-09-27 07:02:05 -07:00
if stopAfter == numSamples {
2023-02-21 01:30:11 -08:00
return c . Iterator ( it )
2021-08-03 05:14:26 -07:00
}
2023-02-21 01:30:11 -08:00
return makeStopIterator ( c , it , stopAfter )
2021-08-03 05:14:26 -07:00
}
2021-11-17 02:21:27 -08:00
// stopIterator wraps an Iterator, but only returns the first
// stopAfter values, if initialized with i=-1.
2021-08-03 05:14:26 -07:00
type stopIterator struct {
chunkenc . Iterator
i , stopAfter int
}
2021-11-28 23:54:23 -08:00
func ( it * stopIterator ) Next ( ) chunkenc . ValueType {
2021-08-03 05:14:26 -07:00
if it . i + 1 >= it . stopAfter {
2021-11-28 23:54:23 -08:00
return chunkenc . ValNone
2021-08-03 05:14:26 -07:00
}
it . i ++
return it . Iterator . Next ( )
}
2022-10-05 13:14:49 -07:00
func makeStopIterator ( c chunkenc . Chunk , it chunkenc . Iterator , stopAfter int ) chunkenc . Iterator {
// Re-use the Iterator object if it is a stopIterator.
if stopIter , ok := it . ( * stopIterator ) ; ok {
stopIter . Iterator = c . Iterator ( stopIter . Iterator )
stopIter . i = - 1
stopIter . stopAfter = stopAfter
return stopIter
}
return & stopIterator {
Iterator : c . Iterator ( it ) ,
i : - 1 ,
stopAfter : stopAfter ,
}
}