2017-05-26 08:56:31 -07:00
// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2017-05-16 00:13:33 -07:00
package tsdb
2017-11-21 03:15:02 -08:00
import (
2018-12-05 08:34:42 -08:00
"context"
2019-03-24 13:33:08 -07:00
"encoding/binary"
"errors"
2021-02-09 09:38:35 -08:00
"fmt"
2019-12-03 23:37:49 -08:00
"hash/crc32"
2018-10-23 14:35:52 -07:00
"math/rand"
2017-11-21 03:15:02 -08:00
"os"
2017-11-30 06:34:49 -08:00
"path/filepath"
2021-02-09 09:38:35 -08:00
"sort"
2019-05-27 04:24:46 -07:00
"strconv"
2017-11-21 03:15:02 -08:00
"testing"
2017-12-06 17:06:14 -08:00
2021-06-11 09:17:59 -07:00
"github.com/go-kit/log"
2022-09-20 10:05:50 -07:00
prom_testutil "github.com/prometheus/client_golang/prometheus/testutil"
2020-10-29 02:43:23 -07:00
"github.com/stretchr/testify/require"
2020-10-22 02:00:08 -07:00
2022-08-29 03:05:03 -07:00
"github.com/prometheus/prometheus/model/histogram"
2021-11-08 06:23:17 -08:00
"github.com/prometheus/prometheus/model/labels"
2020-02-06 07:58:38 -08:00
"github.com/prometheus/prometheus/storage"
2021-11-28 23:54:23 -08:00
"github.com/prometheus/prometheus/tsdb/chunkenc"
2019-08-13 01:34:14 -07:00
"github.com/prometheus/prometheus/tsdb/chunks"
2019-11-18 11:53:33 -08:00
"github.com/prometheus/prometheus/tsdb/fileutil"
2022-10-10 08:08:46 -07:00
"github.com/prometheus/prometheus/tsdb/wlog"
2017-11-21 03:15:02 -08:00
)
2018-02-12 02:40:12 -08:00
// In Prometheus 2.1.0 we had a bug where the meta.json version was falsely bumped
// to 2. We had a migration in place resetting it to 1 but we should move immediately to
// version 3 next time to avoid confusion and issues.
func TestBlockMetaMustNeverBeVersion2 ( t * testing . T ) {
2022-01-22 01:55:01 -08:00
dir := t . TempDir ( )
2018-02-12 02:40:12 -08:00
2022-01-22 01:55:01 -08:00
_ , err := writeMetaFile ( log . NewNopLogger ( ) , dir , & BlockMeta { } )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2018-02-12 02:40:12 -08:00
2019-06-24 08:42:29 -07:00
meta , _ , err := readMetaFile ( dir )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
require . NotEqual ( t , 2 , meta . Version , "meta.json version must never be 2" )
2018-02-12 02:40:12 -08:00
}
2017-11-21 03:15:02 -08:00
func TestSetCompactionFailed ( t * testing . T ) {
2022-01-22 01:55:01 -08:00
tmpdir := t . TempDir ( )
2017-11-21 03:15:02 -08:00
2019-07-03 03:47:31 -07:00
blockDir := createBlock ( t , tmpdir , genSeries ( 1 , 1 , 0 , 1 ) )
2019-01-16 02:03:52 -08:00
b , err := OpenBlock ( nil , blockDir , nil )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
require . Equal ( t , false , b . meta . Compaction . Failed )
require . NoError ( t , b . setCompactionFailed ( ) )
require . Equal ( t , true , b . meta . Compaction . Failed )
require . NoError ( t , b . Close ( ) )
2017-11-21 03:15:02 -08:00
2019-01-16 02:03:52 -08:00
b , err = OpenBlock ( nil , blockDir , nil )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
require . Equal ( t , true , b . meta . Compaction . Failed )
require . NoError ( t , b . Close ( ) )
2017-11-21 03:15:02 -08:00
}
2019-03-19 06:31:57 -07:00
func TestCreateBlock ( t * testing . T ) {
2022-01-22 01:55:01 -08:00
tmpdir := t . TempDir ( )
2019-03-19 06:31:57 -07:00
b , err := OpenBlock ( nil , createBlock ( t , tmpdir , genSeries ( 1 , 1 , 0 , 10 ) ) , nil )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2022-11-14 08:48:16 -08:00
require . NoError ( t , b . Close ( ) )
}
func BenchmarkOpenBlock ( b * testing . B ) {
tmpdir := b . TempDir ( )
blockDir := createBlock ( b , tmpdir , genSeries ( 1e6 , 20 , 0 , 10 ) )
b . Run ( "benchmark" , func ( b * testing . B ) {
for i := 0 ; i < b . N ; i ++ {
block , err := OpenBlock ( nil , blockDir , nil )
require . NoError ( b , err )
require . NoError ( b , block . Close ( ) )
}
} )
2019-03-19 06:31:57 -07:00
}
2019-03-24 13:33:08 -07:00
func TestCorruptedChunk ( t * testing . T ) {
2020-07-31 08:03:02 -07:00
for _ , tc := range [ ] struct {
name string
2019-03-24 13:33:08 -07:00
corrFunc func ( f * os . File ) // Func that applies the corruption.
2019-12-03 23:37:49 -08:00
openErr error
2020-07-31 08:03:02 -07:00
iterErr error
2019-03-24 13:33:08 -07:00
} {
2020-07-31 08:03:02 -07:00
{
name : "invalid header size" ,
corrFunc : func ( f * os . File ) {
2020-10-29 02:43:23 -07:00
require . NoError ( t , f . Truncate ( 1 ) )
2019-03-24 13:33:08 -07:00
} ,
2020-07-31 08:03:02 -07:00
openErr : errors . New ( "invalid segment header in segment 0: invalid size" ) ,
2019-03-24 13:33:08 -07:00
} ,
2020-07-31 08:03:02 -07:00
{
name : "invalid magic number" ,
corrFunc : func ( f * os . File ) {
2019-03-24 13:33:08 -07:00
magicChunksOffset := int64 ( 0 )
_ , err := f . Seek ( magicChunksOffset , 0 )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2019-03-24 13:33:08 -07:00
// Set invalid magic number.
b := make ( [ ] byte , chunks . MagicChunksSize )
binary . BigEndian . PutUint32 ( b [ : chunks . MagicChunksSize ] , 0x00000000 )
n , err := f . Write ( b )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
require . Equal ( t , chunks . MagicChunksSize , n )
2019-03-24 13:33:08 -07:00
} ,
2020-07-31 08:03:02 -07:00
openErr : errors . New ( "invalid magic number 0" ) ,
2019-03-24 13:33:08 -07:00
} ,
2020-07-31 08:03:02 -07:00
{
name : "invalid chunk format version" ,
corrFunc : func ( f * os . File ) {
2019-03-24 13:33:08 -07:00
chunksFormatVersionOffset := int64 ( 4 )
_ , err := f . Seek ( chunksFormatVersionOffset , 0 )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2019-03-24 13:33:08 -07:00
// Set invalid chunk format version.
b := make ( [ ] byte , chunks . ChunksFormatVersionSize )
b [ 0 ] = 0
n , err := f . Write ( b )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
require . Equal ( t , chunks . ChunksFormatVersionSize , n )
2019-03-24 13:33:08 -07:00
} ,
2020-07-31 08:03:02 -07:00
openErr : errors . New ( "invalid chunk format version 0" ) ,
2019-12-03 23:37:49 -08:00
} ,
2020-07-31 08:03:02 -07:00
{
name : "chunk not enough bytes to read the chunk length" ,
corrFunc : func ( f * os . File ) {
2019-12-03 23:37:49 -08:00
// Truncate one byte after the segment header.
2020-10-29 02:43:23 -07:00
require . NoError ( t , f . Truncate ( chunks . SegmentHeaderSize + 1 ) )
2019-12-03 23:37:49 -08:00
} ,
2022-11-28 00:12:54 -08:00
iterErr : errors . New ( "cannot populate chunk 8 from block 00000000000000000000000000: segment doesn't include enough bytes to read the chunk size data field - required:13, available:9" ) ,
2019-12-03 23:37:49 -08:00
} ,
2020-07-31 08:03:02 -07:00
{
name : "chunk not enough bytes to read the data" ,
corrFunc : func ( f * os . File ) {
2019-12-03 23:37:49 -08:00
fi , err := f . Stat ( )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
require . NoError ( t , f . Truncate ( fi . Size ( ) - 1 ) )
2019-12-03 23:37:49 -08:00
} ,
2022-11-28 00:12:54 -08:00
iterErr : errors . New ( "cannot populate chunk 8 from block 00000000000000000000000000: segment doesn't include enough bytes to read the chunk - required:26, available:25" ) ,
2019-12-03 23:37:49 -08:00
} ,
2020-07-31 08:03:02 -07:00
{
name : "checksum mismatch" ,
corrFunc : func ( f * os . File ) {
2019-12-03 23:37:49 -08:00
fi , err := f . Stat ( )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2019-12-03 23:37:49 -08:00
// Get the chunk data end offset.
chkEndOffset := int ( fi . Size ( ) ) - crc32 . Size
// Seek to the last byte of chunk data and modify it.
_ , err = f . Seek ( int64 ( chkEndOffset - 1 ) , 0 )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2019-12-03 23:37:49 -08:00
n , err := f . Write ( [ ] byte ( "x" ) )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
require . Equal ( t , n , 1 )
2019-12-03 23:37:49 -08:00
} ,
2022-11-28 00:12:54 -08:00
iterErr : errors . New ( "cannot populate chunk 8 from block 00000000000000000000000000: checksum mismatch expected:cfc0526c, actual:34815eae" ) ,
2019-03-24 13:33:08 -07:00
} ,
} {
2020-07-31 08:03:02 -07:00
t . Run ( tc . name , func ( t * testing . T ) {
2022-01-22 01:55:01 -08:00
tmpdir := t . TempDir ( )
2019-03-24 13:33:08 -07:00
2023-08-24 06:21:17 -07:00
series := storage . NewListSeries ( labels . FromStrings ( "a" , "b" ) , [ ] chunks . Sample { sample { 1 , 1 , nil , nil } } )
2020-02-06 07:58:38 -08:00
blockDir := createBlock ( t , tmpdir , [ ] storage . Series { series } )
2019-03-24 13:33:08 -07:00
files , err := sequenceFiles ( chunkDir ( blockDir ) )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
require . Greater ( t , len ( files ) , 0 , "No chunk created." )
2019-03-24 13:33:08 -07:00
2021-10-22 01:06:44 -07:00
f , err := os . OpenFile ( files [ 0 ] , os . O_RDWR , 0 o666 )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2019-03-24 13:33:08 -07:00
// Apply corruption function.
2020-07-31 08:03:02 -07:00
tc . corrFunc ( f )
2020-10-29 02:43:23 -07:00
require . NoError ( t , f . Close ( ) )
2019-03-24 13:33:08 -07:00
2019-12-03 23:37:49 -08:00
// Check open err.
b , err := OpenBlock ( nil , blockDir , nil )
2020-07-31 08:03:02 -07:00
if tc . openErr != nil {
2020-10-29 02:43:23 -07:00
require . Equal ( t , tc . openErr . Error ( ) , err . Error ( ) )
2019-12-03 23:37:49 -08:00
return
}
2020-10-29 02:43:23 -07:00
defer func ( ) { require . NoError ( t , b . Close ( ) ) } ( )
2019-12-03 23:37:49 -08:00
querier , err := NewBlockQuerier ( b , 0 , 1 )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
defer func ( ) { require . NoError ( t , querier . Close ( ) ) } ( )
2020-06-09 09:57:31 -07:00
set := querier . Select ( false , nil , labels . MustNewMatcher ( labels . MatchEqual , "a" , "b" ) )
2019-12-03 23:37:49 -08:00
2020-07-31 08:03:02 -07:00
// Check chunk errors during iter time.
2020-10-29 02:43:23 -07:00
require . True ( t , set . Next ( ) )
2022-09-20 10:16:45 -07:00
it := set . At ( ) . Iterator ( nil )
2021-11-28 23:54:23 -08:00
require . Equal ( t , chunkenc . ValNone , it . Next ( ) )
2020-10-29 02:43:23 -07:00
require . Equal ( t , tc . iterErr . Error ( ) , it . Err ( ) . Error ( ) )
2019-03-24 13:33:08 -07:00
} )
}
}
2021-02-09 09:38:35 -08:00
func TestLabelValuesWithMatchers ( t * testing . T ) {
2022-01-22 01:55:01 -08:00
tmpdir := t . TempDir ( )
2021-02-09 09:38:35 -08:00
var seriesEntries [ ] storage . Series
for i := 0 ; i < 100 ; i ++ {
2022-03-09 14:17:29 -08:00
seriesEntries = append ( seriesEntries , storage . NewListSeries ( labels . FromStrings (
"tens" , fmt . Sprintf ( "value%d" , i / 10 ) ,
"unique" , fmt . Sprintf ( "value%d" , i ) ,
2023-08-24 06:21:17 -07:00
) , [ ] chunks . Sample { sample { 100 , 0 , nil , nil } } ) )
2021-02-09 09:38:35 -08:00
}
blockDir := createBlock ( t , tmpdir , seriesEntries )
files , err := sequenceFiles ( chunkDir ( blockDir ) )
require . NoError ( t , err )
require . Greater ( t , len ( files ) , 0 , "No chunk created." )
// Check open err.
block , err := OpenBlock ( nil , blockDir , nil )
require . NoError ( t , err )
defer func ( ) { require . NoError ( t , block . Close ( ) ) } ( )
indexReader , err := block . Index ( )
require . NoError ( t , err )
defer func ( ) { require . NoError ( t , indexReader . Close ( ) ) } ( )
testCases := [ ] struct {
name string
labelName string
matchers [ ] * labels . Matcher
expectedValues [ ] string
} {
{
name : "get tens based on unique id" ,
labelName : "tens" ,
matchers : [ ] * labels . Matcher { labels . MustNewMatcher ( labels . MatchEqual , "unique" , "value35" ) } ,
expectedValues : [ ] string { "value3" } ,
} , {
name : "get unique ids based on a ten" ,
labelName : "unique" ,
matchers : [ ] * labels . Matcher { labels . MustNewMatcher ( labels . MatchEqual , "tens" , "value1" ) } ,
expectedValues : [ ] string { "value10" , "value11" , "value12" , "value13" , "value14" , "value15" , "value16" , "value17" , "value18" , "value19" } ,
} , {
name : "get tens by pattern matching on unique id" ,
labelName : "tens" ,
matchers : [ ] * labels . Matcher { labels . MustNewMatcher ( labels . MatchRegexp , "unique" , "value[5-7]5" ) } ,
expectedValues : [ ] string { "value5" , "value6" , "value7" } ,
} , {
name : "get tens by matching for absence of unique label" ,
labelName : "tens" ,
matchers : [ ] * labels . Matcher { labels . MustNewMatcher ( labels . MatchNotEqual , "unique" , "" ) } ,
expectedValues : [ ] string { "value0" , "value1" , "value2" , "value3" , "value4" , "value5" , "value6" , "value7" , "value8" , "value9" } ,
} ,
}
for _ , tt := range testCases {
t . Run ( tt . name , func ( t * testing . T ) {
actualValues , err := indexReader . SortedLabelValues ( tt . labelName , tt . matchers ... )
require . NoError ( t , err )
require . Equal ( t , tt . expectedValues , actualValues )
actualValues , err = indexReader . LabelValues ( tt . labelName , tt . matchers ... )
sort . Strings ( actualValues )
require . NoError ( t , err )
require . Equal ( t , tt . expectedValues , actualValues )
} )
}
}
2019-06-24 08:42:29 -07:00
// TestBlockSize ensures that the block size is calculated correctly.
func TestBlockSize ( t * testing . T ) {
2022-01-22 01:55:01 -08:00
tmpdir := t . TempDir ( )
2019-06-24 08:42:29 -07:00
var (
blockInit * Block
expSizeInit int64
blockDirInit string
2022-01-22 01:55:01 -08:00
err error
2019-06-24 08:42:29 -07:00
)
// Create a block and compare the reported size vs actual disk size.
{
blockDirInit = createBlock ( t , tmpdir , genSeries ( 10 , 1 , 1 , 100 ) )
blockInit , err = OpenBlock ( nil , blockDirInit , nil )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2019-06-24 08:42:29 -07:00
defer func ( ) {
2020-10-29 02:43:23 -07:00
require . NoError ( t , blockInit . Close ( ) )
2019-06-24 08:42:29 -07:00
} ( )
expSizeInit = blockInit . Size ( )
2019-11-11 18:40:16 -08:00
actSizeInit , err := fileutil . DirSize ( blockInit . Dir ( ) )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
require . Equal ( t , expSizeInit , actSizeInit )
2019-06-24 08:42:29 -07:00
}
// Delete some series and check the sizes again.
{
2020-10-29 02:43:23 -07:00
require . NoError ( t , blockInit . Delete ( 1 , 10 , labels . MustNewMatcher ( labels . MatchRegexp , "" , ".*" ) ) )
2019-06-24 08:42:29 -07:00
expAfterDelete := blockInit . Size ( )
2020-10-29 02:43:23 -07:00
require . Greater ( t , expAfterDelete , expSizeInit , "after a delete the block size should be bigger as the tombstone file should grow %v > %v" , expAfterDelete , expSizeInit )
2019-11-11 18:40:16 -08:00
actAfterDelete , err := fileutil . DirSize ( blockDirInit )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
require . Equal ( t , expAfterDelete , actAfterDelete , "after a delete reported block size doesn't match actual disk size" )
2019-06-24 08:42:29 -07:00
2021-05-18 09:38:37 -07:00
c , err := NewLeveledCompactor ( context . Background ( ) , nil , log . NewNopLogger ( ) , [ ] int64 { 0 } , nil , nil )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2019-06-24 08:42:29 -07:00
blockDirAfterCompact , err := c . Compact ( tmpdir , [ ] string { blockInit . Dir ( ) } , nil )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2019-06-24 08:42:29 -07:00
blockAfterCompact , err := OpenBlock ( nil , filepath . Join ( tmpdir , blockDirAfterCompact . String ( ) ) , nil )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2019-06-24 08:42:29 -07:00
defer func ( ) {
2020-10-29 02:43:23 -07:00
require . NoError ( t , blockAfterCompact . Close ( ) )
2019-06-24 08:42:29 -07:00
} ( )
expAfterCompact := blockAfterCompact . Size ( )
2019-11-11 18:40:16 -08:00
actAfterCompact , err := fileutil . DirSize ( blockAfterCompact . Dir ( ) )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
require . Greater ( t , actAfterDelete , actAfterCompact , "after a delete and compaction the block size should be smaller %v,%v" , actAfterDelete , actAfterCompact )
require . Equal ( t , expAfterCompact , actAfterCompact , "after a delete and compaction reported block size doesn't match actual disk size" )
2019-06-24 08:42:29 -07:00
}
}
Load only some offsets into the symbol table into memory.
Rather than keeping the entire symbol table in memory, keep every nth
offset and walk from there to the entry we need. This ends up slightly
slower, ~360ms per 1M series returned from PostingsForMatchers which is
not much considering the rest of the CPU such a query would go on to
use.
Make LabelValues use the postings tables, rather than having
to do symbol lookups. Use yoloString, as PostingsForMatchers
doesn't need the strings to stick around and adjust the API
call to keep the Querier open until it's all marshalled.
Remove allocatedSymbols memory optimisation, we no longer keep all the
symbol strings in heap memory. Remove LabelValuesFor and LabelIndices,
they're dead code. Ensure we've still tests for label indices,
and add missing test that we can work with old V1 Format index files.
PostingForMatchers performance is slightly better, with a big drop in
allocation counts due to using yoloString for LabelValues:
benchmark old ns/op new ns/op delta
BenchmarkPostingsForMatchers/Block/n="1"-4 36698 36681 -0.05%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 522786 560887 +7.29%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 511652 537680 +5.09%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 522102 564239 +8.07%
BenchmarkPostingsForMatchers/Block/i=~".*"-4 113689911 111795919 -1.67%
BenchmarkPostingsForMatchers/Block/i=~".+"-4 135825572 132871085 -2.18%
BenchmarkPostingsForMatchers/Block/i=~""-4 40782628 38038181 -6.73%
BenchmarkPostingsForMatchers/Block/i!=""-4 31267869 29194327 -6.63%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 112733329 111568823 -1.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 112868153 111232029 -1.45%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 31338257 29349446 -6.35%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 32054482 29972436 -6.50%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 136504654 133968442 -1.86%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 27960350 27264997 -2.49%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 136765564 133860724 -2.12%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 163714583 159453668 -2.60%
benchmark old allocs new allocs delta
BenchmarkPostingsForMatchers/Block/n="1"-4 6 6 +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 11 11 +0.00%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 11 11 +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 17 15 -11.76%
BenchmarkPostingsForMatchers/Block/i=~".*"-4 100012 12 -99.99%
BenchmarkPostingsForMatchers/Block/i=~".+"-4 200040 100040 -49.99%
BenchmarkPostingsForMatchers/Block/i=~""-4 200045 100045 -49.99%
BenchmarkPostingsForMatchers/Block/i!=""-4 200041 100041 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 100017 17 -99.98%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 100023 23 -99.98%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 200046 100046 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 200050 100050 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 200049 100049 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 111150 11150 -89.97%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 200055 100055 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 311238 111238 -64.26%
benchmark old bytes new bytes delta
BenchmarkPostingsForMatchers/Block/n="1"-4 296 296 +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 424 424 +0.00%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 424 424 +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 552 1544 +179.71%
BenchmarkPostingsForMatchers/Block/i=~".*"-4 1600482 1606125 +0.35%
BenchmarkPostingsForMatchers/Block/i=~".+"-4 17259065 17264709 +0.03%
BenchmarkPostingsForMatchers/Block/i=~""-4 17259150 17264780 +0.03%
BenchmarkPostingsForMatchers/Block/i!=""-4 17259048 17264680 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 1600610 1606242 +0.35%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 1600813 1606434 +0.35%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 17259176 17264808 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 17259304 17264936 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 17259333 17264965 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 3142628 3148262 +0.18%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 17259509 17265141 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 20405680 20416944 +0.06%
However overall Select performance is down and involves more allocs, due to
having to do more than a simple map lookup to resolve a symbol and that all the strings
returned are allocated:
benchmark old ns/op new ns/op delta
BenchmarkQuerierSelect/Block/1of1000000-4 506092636 862678244 +70.46%
BenchmarkQuerierSelect/Block/10of1000000-4 505638968 860917636 +70.26%
BenchmarkQuerierSelect/Block/100of1000000-4 505229450 882150048 +74.60%
BenchmarkQuerierSelect/Block/1000of1000000-4 515905414 862241115 +67.13%
BenchmarkQuerierSelect/Block/10000of1000000-4 516785354 874841110 +69.29%
BenchmarkQuerierSelect/Block/100000of1000000-4 540742808 907030187 +67.74%
BenchmarkQuerierSelect/Block/1000000of1000000-4 815224288 1181236903 +44.90%
benchmark old allocs new allocs delta
BenchmarkQuerierSelect/Block/1of1000000-4 4000020 6000020 +50.00%
BenchmarkQuerierSelect/Block/10of1000000-4 4000038 6000038 +50.00%
BenchmarkQuerierSelect/Block/100of1000000-4 4000218 6000218 +50.00%
BenchmarkQuerierSelect/Block/1000of1000000-4 4002018 6002018 +49.97%
BenchmarkQuerierSelect/Block/10000of1000000-4 4020018 6020018 +49.75%
BenchmarkQuerierSelect/Block/100000of1000000-4 4200018 6200018 +47.62%
BenchmarkQuerierSelect/Block/1000000of1000000-4 6000018 8000019 +33.33%
benchmark old bytes new bytes delta
BenchmarkQuerierSelect/Block/1of1000000-4 176001468 227201476 +29.09%
BenchmarkQuerierSelect/Block/10of1000000-4 176002620 227202628 +29.09%
BenchmarkQuerierSelect/Block/100of1000000-4 176014140 227214148 +29.09%
BenchmarkQuerierSelect/Block/1000of1000000-4 176129340 227329348 +29.07%
BenchmarkQuerierSelect/Block/10000of1000000-4 177281340 228481348 +28.88%
BenchmarkQuerierSelect/Block/100000of1000000-4 188801340 240001348 +27.12%
BenchmarkQuerierSelect/Block/1000000of1000000-4 304001340 355201616 +16.84%
Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
2019-12-12 08:55:32 -08:00
func TestReadIndexFormatV1 ( t * testing . T ) {
2020-01-06 06:06:11 -08:00
/ * The block here was produced at the commit
706602 daed1487f7849990678b4ece4599745905 used in 2.0 .0 with :
db , _ := Open ( "v1db" , nil , nil , nil )
app := db . Appender ( )
app . Add ( labels . FromStrings ( "foo" , "bar" ) , 1 , 2 )
app . Add ( labels . FromStrings ( "foo" , "baz" ) , 3 , 4 )
app . Add ( labels . FromStrings ( "foo" , "meh" ) , 1000 * 3600 * 4 , 4 ) // Not in the block.
// Make sure we've enough values for the lack of sorting of postings offsets to show up.
for i := 0 ; i < 100 ; i ++ {
app . Add ( labels . FromStrings ( "bar" , strconv . FormatInt ( int64 ( i ) , 10 ) ) , 0 , 0 )
}
app . Commit ( )
db . compact ( )
db . Close ( )
Load only some offsets into the symbol table into memory.
Rather than keeping the entire symbol table in memory, keep every nth
offset and walk from there to the entry we need. This ends up slightly
slower, ~360ms per 1M series returned from PostingsForMatchers which is
not much considering the rest of the CPU such a query would go on to
use.
Make LabelValues use the postings tables, rather than having
to do symbol lookups. Use yoloString, as PostingsForMatchers
doesn't need the strings to stick around and adjust the API
call to keep the Querier open until it's all marshalled.
Remove allocatedSymbols memory optimisation, we no longer keep all the
symbol strings in heap memory. Remove LabelValuesFor and LabelIndices,
they're dead code. Ensure we've still tests for label indices,
and add missing test that we can work with old V1 Format index files.
PostingForMatchers performance is slightly better, with a big drop in
allocation counts due to using yoloString for LabelValues:
benchmark old ns/op new ns/op delta
BenchmarkPostingsForMatchers/Block/n="1"-4 36698 36681 -0.05%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 522786 560887 +7.29%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 511652 537680 +5.09%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 522102 564239 +8.07%
BenchmarkPostingsForMatchers/Block/i=~".*"-4 113689911 111795919 -1.67%
BenchmarkPostingsForMatchers/Block/i=~".+"-4 135825572 132871085 -2.18%
BenchmarkPostingsForMatchers/Block/i=~""-4 40782628 38038181 -6.73%
BenchmarkPostingsForMatchers/Block/i!=""-4 31267869 29194327 -6.63%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 112733329 111568823 -1.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 112868153 111232029 -1.45%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 31338257 29349446 -6.35%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 32054482 29972436 -6.50%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 136504654 133968442 -1.86%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 27960350 27264997 -2.49%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 136765564 133860724 -2.12%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 163714583 159453668 -2.60%
benchmark old allocs new allocs delta
BenchmarkPostingsForMatchers/Block/n="1"-4 6 6 +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 11 11 +0.00%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 11 11 +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 17 15 -11.76%
BenchmarkPostingsForMatchers/Block/i=~".*"-4 100012 12 -99.99%
BenchmarkPostingsForMatchers/Block/i=~".+"-4 200040 100040 -49.99%
BenchmarkPostingsForMatchers/Block/i=~""-4 200045 100045 -49.99%
BenchmarkPostingsForMatchers/Block/i!=""-4 200041 100041 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 100017 17 -99.98%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 100023 23 -99.98%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 200046 100046 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 200050 100050 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 200049 100049 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 111150 11150 -89.97%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 200055 100055 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 311238 111238 -64.26%
benchmark old bytes new bytes delta
BenchmarkPostingsForMatchers/Block/n="1"-4 296 296 +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 424 424 +0.00%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 424 424 +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 552 1544 +179.71%
BenchmarkPostingsForMatchers/Block/i=~".*"-4 1600482 1606125 +0.35%
BenchmarkPostingsForMatchers/Block/i=~".+"-4 17259065 17264709 +0.03%
BenchmarkPostingsForMatchers/Block/i=~""-4 17259150 17264780 +0.03%
BenchmarkPostingsForMatchers/Block/i!=""-4 17259048 17264680 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 1600610 1606242 +0.35%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 1600813 1606434 +0.35%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 17259176 17264808 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 17259304 17264936 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 17259333 17264965 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 3142628 3148262 +0.18%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 17259509 17265141 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 20405680 20416944 +0.06%
However overall Select performance is down and involves more allocs, due to
having to do more than a simple map lookup to resolve a symbol and that all the strings
returned are allocated:
benchmark old ns/op new ns/op delta
BenchmarkQuerierSelect/Block/1of1000000-4 506092636 862678244 +70.46%
BenchmarkQuerierSelect/Block/10of1000000-4 505638968 860917636 +70.26%
BenchmarkQuerierSelect/Block/100of1000000-4 505229450 882150048 +74.60%
BenchmarkQuerierSelect/Block/1000of1000000-4 515905414 862241115 +67.13%
BenchmarkQuerierSelect/Block/10000of1000000-4 516785354 874841110 +69.29%
BenchmarkQuerierSelect/Block/100000of1000000-4 540742808 907030187 +67.74%
BenchmarkQuerierSelect/Block/1000000of1000000-4 815224288 1181236903 +44.90%
benchmark old allocs new allocs delta
BenchmarkQuerierSelect/Block/1of1000000-4 4000020 6000020 +50.00%
BenchmarkQuerierSelect/Block/10of1000000-4 4000038 6000038 +50.00%
BenchmarkQuerierSelect/Block/100of1000000-4 4000218 6000218 +50.00%
BenchmarkQuerierSelect/Block/1000of1000000-4 4002018 6002018 +49.97%
BenchmarkQuerierSelect/Block/10000of1000000-4 4020018 6020018 +49.75%
BenchmarkQuerierSelect/Block/100000of1000000-4 4200018 6200018 +47.62%
BenchmarkQuerierSelect/Block/1000000of1000000-4 6000018 8000019 +33.33%
benchmark old bytes new bytes delta
BenchmarkQuerierSelect/Block/1of1000000-4 176001468 227201476 +29.09%
BenchmarkQuerierSelect/Block/10of1000000-4 176002620 227202628 +29.09%
BenchmarkQuerierSelect/Block/100of1000000-4 176014140 227214148 +29.09%
BenchmarkQuerierSelect/Block/1000of1000000-4 176129340 227329348 +29.07%
BenchmarkQuerierSelect/Block/10000of1000000-4 177281340 228481348 +28.88%
BenchmarkQuerierSelect/Block/100000of1000000-4 188801340 240001348 +27.12%
BenchmarkQuerierSelect/Block/1000000of1000000-4 304001340 355201616 +16.84%
Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
2019-12-12 08:55:32 -08:00
* /
blockDir := filepath . Join ( "testdata" , "index_format_v1" )
block , err := OpenBlock ( nil , blockDir , nil )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
Load only some offsets into the symbol table into memory.
Rather than keeping the entire symbol table in memory, keep every nth
offset and walk from there to the entry we need. This ends up slightly
slower, ~360ms per 1M series returned from PostingsForMatchers which is
not much considering the rest of the CPU such a query would go on to
use.
Make LabelValues use the postings tables, rather than having
to do symbol lookups. Use yoloString, as PostingsForMatchers
doesn't need the strings to stick around and adjust the API
call to keep the Querier open until it's all marshalled.
Remove allocatedSymbols memory optimisation, we no longer keep all the
symbol strings in heap memory. Remove LabelValuesFor and LabelIndices,
they're dead code. Ensure we've still tests for label indices,
and add missing test that we can work with old V1 Format index files.
PostingForMatchers performance is slightly better, with a big drop in
allocation counts due to using yoloString for LabelValues:
benchmark old ns/op new ns/op delta
BenchmarkPostingsForMatchers/Block/n="1"-4 36698 36681 -0.05%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 522786 560887 +7.29%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 511652 537680 +5.09%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 522102 564239 +8.07%
BenchmarkPostingsForMatchers/Block/i=~".*"-4 113689911 111795919 -1.67%
BenchmarkPostingsForMatchers/Block/i=~".+"-4 135825572 132871085 -2.18%
BenchmarkPostingsForMatchers/Block/i=~""-4 40782628 38038181 -6.73%
BenchmarkPostingsForMatchers/Block/i!=""-4 31267869 29194327 -6.63%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 112733329 111568823 -1.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 112868153 111232029 -1.45%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 31338257 29349446 -6.35%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 32054482 29972436 -6.50%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 136504654 133968442 -1.86%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 27960350 27264997 -2.49%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 136765564 133860724 -2.12%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 163714583 159453668 -2.60%
benchmark old allocs new allocs delta
BenchmarkPostingsForMatchers/Block/n="1"-4 6 6 +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 11 11 +0.00%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 11 11 +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 17 15 -11.76%
BenchmarkPostingsForMatchers/Block/i=~".*"-4 100012 12 -99.99%
BenchmarkPostingsForMatchers/Block/i=~".+"-4 200040 100040 -49.99%
BenchmarkPostingsForMatchers/Block/i=~""-4 200045 100045 -49.99%
BenchmarkPostingsForMatchers/Block/i!=""-4 200041 100041 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 100017 17 -99.98%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 100023 23 -99.98%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 200046 100046 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 200050 100050 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 200049 100049 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 111150 11150 -89.97%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 200055 100055 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 311238 111238 -64.26%
benchmark old bytes new bytes delta
BenchmarkPostingsForMatchers/Block/n="1"-4 296 296 +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 424 424 +0.00%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 424 424 +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 552 1544 +179.71%
BenchmarkPostingsForMatchers/Block/i=~".*"-4 1600482 1606125 +0.35%
BenchmarkPostingsForMatchers/Block/i=~".+"-4 17259065 17264709 +0.03%
BenchmarkPostingsForMatchers/Block/i=~""-4 17259150 17264780 +0.03%
BenchmarkPostingsForMatchers/Block/i!=""-4 17259048 17264680 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 1600610 1606242 +0.35%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 1600813 1606434 +0.35%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 17259176 17264808 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 17259304 17264936 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 17259333 17264965 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 3142628 3148262 +0.18%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 17259509 17265141 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 20405680 20416944 +0.06%
However overall Select performance is down and involves more allocs, due to
having to do more than a simple map lookup to resolve a symbol and that all the strings
returned are allocated:
benchmark old ns/op new ns/op delta
BenchmarkQuerierSelect/Block/1of1000000-4 506092636 862678244 +70.46%
BenchmarkQuerierSelect/Block/10of1000000-4 505638968 860917636 +70.26%
BenchmarkQuerierSelect/Block/100of1000000-4 505229450 882150048 +74.60%
BenchmarkQuerierSelect/Block/1000of1000000-4 515905414 862241115 +67.13%
BenchmarkQuerierSelect/Block/10000of1000000-4 516785354 874841110 +69.29%
BenchmarkQuerierSelect/Block/100000of1000000-4 540742808 907030187 +67.74%
BenchmarkQuerierSelect/Block/1000000of1000000-4 815224288 1181236903 +44.90%
benchmark old allocs new allocs delta
BenchmarkQuerierSelect/Block/1of1000000-4 4000020 6000020 +50.00%
BenchmarkQuerierSelect/Block/10of1000000-4 4000038 6000038 +50.00%
BenchmarkQuerierSelect/Block/100of1000000-4 4000218 6000218 +50.00%
BenchmarkQuerierSelect/Block/1000of1000000-4 4002018 6002018 +49.97%
BenchmarkQuerierSelect/Block/10000of1000000-4 4020018 6020018 +49.75%
BenchmarkQuerierSelect/Block/100000of1000000-4 4200018 6200018 +47.62%
BenchmarkQuerierSelect/Block/1000000of1000000-4 6000018 8000019 +33.33%
benchmark old bytes new bytes delta
BenchmarkQuerierSelect/Block/1of1000000-4 176001468 227201476 +29.09%
BenchmarkQuerierSelect/Block/10of1000000-4 176002620 227202628 +29.09%
BenchmarkQuerierSelect/Block/100of1000000-4 176014140 227214148 +29.09%
BenchmarkQuerierSelect/Block/1000of1000000-4 176129340 227329348 +29.07%
BenchmarkQuerierSelect/Block/10000of1000000-4 177281340 228481348 +28.88%
BenchmarkQuerierSelect/Block/100000of1000000-4 188801340 240001348 +27.12%
BenchmarkQuerierSelect/Block/1000000of1000000-4 304001340 355201616 +16.84%
Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
2019-12-12 08:55:32 -08:00
q , err := NewBlockQuerier ( block , 0 , 1000 )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
require . Equal ( t , query ( t , q , labels . MustNewMatcher ( labels . MatchEqual , "foo" , "bar" ) ) ,
2023-08-24 06:21:17 -07:00
map [ string ] [ ] chunks . Sample { ` { foo="bar"} ` : { sample { t : 1 , f : 2 } } } )
Load only some offsets into the symbol table into memory.
Rather than keeping the entire symbol table in memory, keep every nth
offset and walk from there to the entry we need. This ends up slightly
slower, ~360ms per 1M series returned from PostingsForMatchers which is
not much considering the rest of the CPU such a query would go on to
use.
Make LabelValues use the postings tables, rather than having
to do symbol lookups. Use yoloString, as PostingsForMatchers
doesn't need the strings to stick around and adjust the API
call to keep the Querier open until it's all marshalled.
Remove allocatedSymbols memory optimisation, we no longer keep all the
symbol strings in heap memory. Remove LabelValuesFor and LabelIndices,
they're dead code. Ensure we've still tests for label indices,
and add missing test that we can work with old V1 Format index files.
PostingForMatchers performance is slightly better, with a big drop in
allocation counts due to using yoloString for LabelValues:
benchmark old ns/op new ns/op delta
BenchmarkPostingsForMatchers/Block/n="1"-4 36698 36681 -0.05%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 522786 560887 +7.29%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 511652 537680 +5.09%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 522102 564239 +8.07%
BenchmarkPostingsForMatchers/Block/i=~".*"-4 113689911 111795919 -1.67%
BenchmarkPostingsForMatchers/Block/i=~".+"-4 135825572 132871085 -2.18%
BenchmarkPostingsForMatchers/Block/i=~""-4 40782628 38038181 -6.73%
BenchmarkPostingsForMatchers/Block/i!=""-4 31267869 29194327 -6.63%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 112733329 111568823 -1.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 112868153 111232029 -1.45%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 31338257 29349446 -6.35%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 32054482 29972436 -6.50%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 136504654 133968442 -1.86%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 27960350 27264997 -2.49%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 136765564 133860724 -2.12%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 163714583 159453668 -2.60%
benchmark old allocs new allocs delta
BenchmarkPostingsForMatchers/Block/n="1"-4 6 6 +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 11 11 +0.00%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 11 11 +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 17 15 -11.76%
BenchmarkPostingsForMatchers/Block/i=~".*"-4 100012 12 -99.99%
BenchmarkPostingsForMatchers/Block/i=~".+"-4 200040 100040 -49.99%
BenchmarkPostingsForMatchers/Block/i=~""-4 200045 100045 -49.99%
BenchmarkPostingsForMatchers/Block/i!=""-4 200041 100041 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 100017 17 -99.98%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 100023 23 -99.98%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 200046 100046 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 200050 100050 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 200049 100049 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 111150 11150 -89.97%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 200055 100055 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 311238 111238 -64.26%
benchmark old bytes new bytes delta
BenchmarkPostingsForMatchers/Block/n="1"-4 296 296 +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 424 424 +0.00%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 424 424 +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 552 1544 +179.71%
BenchmarkPostingsForMatchers/Block/i=~".*"-4 1600482 1606125 +0.35%
BenchmarkPostingsForMatchers/Block/i=~".+"-4 17259065 17264709 +0.03%
BenchmarkPostingsForMatchers/Block/i=~""-4 17259150 17264780 +0.03%
BenchmarkPostingsForMatchers/Block/i!=""-4 17259048 17264680 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 1600610 1606242 +0.35%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 1600813 1606434 +0.35%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 17259176 17264808 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 17259304 17264936 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 17259333 17264965 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 3142628 3148262 +0.18%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 17259509 17265141 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 20405680 20416944 +0.06%
However overall Select performance is down and involves more allocs, due to
having to do more than a simple map lookup to resolve a symbol and that all the strings
returned are allocated:
benchmark old ns/op new ns/op delta
BenchmarkQuerierSelect/Block/1of1000000-4 506092636 862678244 +70.46%
BenchmarkQuerierSelect/Block/10of1000000-4 505638968 860917636 +70.26%
BenchmarkQuerierSelect/Block/100of1000000-4 505229450 882150048 +74.60%
BenchmarkQuerierSelect/Block/1000of1000000-4 515905414 862241115 +67.13%
BenchmarkQuerierSelect/Block/10000of1000000-4 516785354 874841110 +69.29%
BenchmarkQuerierSelect/Block/100000of1000000-4 540742808 907030187 +67.74%
BenchmarkQuerierSelect/Block/1000000of1000000-4 815224288 1181236903 +44.90%
benchmark old allocs new allocs delta
BenchmarkQuerierSelect/Block/1of1000000-4 4000020 6000020 +50.00%
BenchmarkQuerierSelect/Block/10of1000000-4 4000038 6000038 +50.00%
BenchmarkQuerierSelect/Block/100of1000000-4 4000218 6000218 +50.00%
BenchmarkQuerierSelect/Block/1000of1000000-4 4002018 6002018 +49.97%
BenchmarkQuerierSelect/Block/10000of1000000-4 4020018 6020018 +49.75%
BenchmarkQuerierSelect/Block/100000of1000000-4 4200018 6200018 +47.62%
BenchmarkQuerierSelect/Block/1000000of1000000-4 6000018 8000019 +33.33%
benchmark old bytes new bytes delta
BenchmarkQuerierSelect/Block/1of1000000-4 176001468 227201476 +29.09%
BenchmarkQuerierSelect/Block/10of1000000-4 176002620 227202628 +29.09%
BenchmarkQuerierSelect/Block/100of1000000-4 176014140 227214148 +29.09%
BenchmarkQuerierSelect/Block/1000of1000000-4 176129340 227329348 +29.07%
BenchmarkQuerierSelect/Block/10000of1000000-4 177281340 228481348 +28.88%
BenchmarkQuerierSelect/Block/100000of1000000-4 188801340 240001348 +27.12%
BenchmarkQuerierSelect/Block/1000000of1000000-4 304001340 355201616 +16.84%
Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
2019-12-12 08:55:32 -08:00
q , err = NewBlockQuerier ( block , 0 , 1000 )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
require . Equal ( t , query ( t , q , labels . MustNewMatcher ( labels . MatchNotRegexp , "foo" , "^.?$" ) ) ,
2023-08-24 06:21:17 -07:00
map [ string ] [ ] chunks . Sample {
2023-03-30 10:50:13 -07:00
` { foo="bar"} ` : { sample { t : 1 , f : 2 } } ,
` { foo="baz"} ` : { sample { t : 3 , f : 4 } } ,
Load only some offsets into the symbol table into memory.
Rather than keeping the entire symbol table in memory, keep every nth
offset and walk from there to the entry we need. This ends up slightly
slower, ~360ms per 1M series returned from PostingsForMatchers which is
not much considering the rest of the CPU such a query would go on to
use.
Make LabelValues use the postings tables, rather than having
to do symbol lookups. Use yoloString, as PostingsForMatchers
doesn't need the strings to stick around and adjust the API
call to keep the Querier open until it's all marshalled.
Remove allocatedSymbols memory optimisation, we no longer keep all the
symbol strings in heap memory. Remove LabelValuesFor and LabelIndices,
they're dead code. Ensure we've still tests for label indices,
and add missing test that we can work with old V1 Format index files.
PostingForMatchers performance is slightly better, with a big drop in
allocation counts due to using yoloString for LabelValues:
benchmark old ns/op new ns/op delta
BenchmarkPostingsForMatchers/Block/n="1"-4 36698 36681 -0.05%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 522786 560887 +7.29%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 511652 537680 +5.09%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 522102 564239 +8.07%
BenchmarkPostingsForMatchers/Block/i=~".*"-4 113689911 111795919 -1.67%
BenchmarkPostingsForMatchers/Block/i=~".+"-4 135825572 132871085 -2.18%
BenchmarkPostingsForMatchers/Block/i=~""-4 40782628 38038181 -6.73%
BenchmarkPostingsForMatchers/Block/i!=""-4 31267869 29194327 -6.63%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 112733329 111568823 -1.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 112868153 111232029 -1.45%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 31338257 29349446 -6.35%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 32054482 29972436 -6.50%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 136504654 133968442 -1.86%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 27960350 27264997 -2.49%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 136765564 133860724 -2.12%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 163714583 159453668 -2.60%
benchmark old allocs new allocs delta
BenchmarkPostingsForMatchers/Block/n="1"-4 6 6 +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 11 11 +0.00%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 11 11 +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 17 15 -11.76%
BenchmarkPostingsForMatchers/Block/i=~".*"-4 100012 12 -99.99%
BenchmarkPostingsForMatchers/Block/i=~".+"-4 200040 100040 -49.99%
BenchmarkPostingsForMatchers/Block/i=~""-4 200045 100045 -49.99%
BenchmarkPostingsForMatchers/Block/i!=""-4 200041 100041 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 100017 17 -99.98%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 100023 23 -99.98%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 200046 100046 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 200050 100050 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 200049 100049 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 111150 11150 -89.97%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 200055 100055 -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 311238 111238 -64.26%
benchmark old bytes new bytes delta
BenchmarkPostingsForMatchers/Block/n="1"-4 296 296 +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 424 424 +0.00%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 424 424 +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 552 1544 +179.71%
BenchmarkPostingsForMatchers/Block/i=~".*"-4 1600482 1606125 +0.35%
BenchmarkPostingsForMatchers/Block/i=~".+"-4 17259065 17264709 +0.03%
BenchmarkPostingsForMatchers/Block/i=~""-4 17259150 17264780 +0.03%
BenchmarkPostingsForMatchers/Block/i!=""-4 17259048 17264680 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 1600610 1606242 +0.35%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 1600813 1606434 +0.35%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 17259176 17264808 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 17259304 17264936 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 17259333 17264965 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 3142628 3148262 +0.18%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 17259509 17265141 +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 20405680 20416944 +0.06%
However overall Select performance is down and involves more allocs, due to
having to do more than a simple map lookup to resolve a symbol and that all the strings
returned are allocated:
benchmark old ns/op new ns/op delta
BenchmarkQuerierSelect/Block/1of1000000-4 506092636 862678244 +70.46%
BenchmarkQuerierSelect/Block/10of1000000-4 505638968 860917636 +70.26%
BenchmarkQuerierSelect/Block/100of1000000-4 505229450 882150048 +74.60%
BenchmarkQuerierSelect/Block/1000of1000000-4 515905414 862241115 +67.13%
BenchmarkQuerierSelect/Block/10000of1000000-4 516785354 874841110 +69.29%
BenchmarkQuerierSelect/Block/100000of1000000-4 540742808 907030187 +67.74%
BenchmarkQuerierSelect/Block/1000000of1000000-4 815224288 1181236903 +44.90%
benchmark old allocs new allocs delta
BenchmarkQuerierSelect/Block/1of1000000-4 4000020 6000020 +50.00%
BenchmarkQuerierSelect/Block/10of1000000-4 4000038 6000038 +50.00%
BenchmarkQuerierSelect/Block/100of1000000-4 4000218 6000218 +50.00%
BenchmarkQuerierSelect/Block/1000of1000000-4 4002018 6002018 +49.97%
BenchmarkQuerierSelect/Block/10000of1000000-4 4020018 6020018 +49.75%
BenchmarkQuerierSelect/Block/100000of1000000-4 4200018 6200018 +47.62%
BenchmarkQuerierSelect/Block/1000000of1000000-4 6000018 8000019 +33.33%
benchmark old bytes new bytes delta
BenchmarkQuerierSelect/Block/1of1000000-4 176001468 227201476 +29.09%
BenchmarkQuerierSelect/Block/10of1000000-4 176002620 227202628 +29.09%
BenchmarkQuerierSelect/Block/100of1000000-4 176014140 227214148 +29.09%
BenchmarkQuerierSelect/Block/1000of1000000-4 176129340 227329348 +29.07%
BenchmarkQuerierSelect/Block/10000of1000000-4 177281340 228481348 +28.88%
BenchmarkQuerierSelect/Block/100000of1000000-4 188801340 240001348 +27.12%
BenchmarkQuerierSelect/Block/1000000of1000000-4 304001340 355201616 +16.84%
Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
2019-12-12 08:55:32 -08:00
} )
}
2021-02-09 09:38:35 -08:00
func BenchmarkLabelValuesWithMatchers ( b * testing . B ) {
2022-01-22 01:55:01 -08:00
tmpdir := b . TempDir ( )
2021-02-09 09:38:35 -08:00
var seriesEntries [ ] storage . Series
metricCount := 1000000
for i := 0 ; i < metricCount ; i ++ {
2022-05-04 14:41:36 -07:00
// Note these series are not created in sort order: 'value2' sorts after 'value10'.
// This makes a big difference to the benchmark timing.
2022-03-09 14:17:29 -08:00
seriesEntries = append ( seriesEntries , storage . NewListSeries ( labels . FromStrings (
"a_unique" , fmt . Sprintf ( "value%d" , i ) ,
"b_tens" , fmt . Sprintf ( "value%d" , i / ( metricCount / 10 ) ) ,
"c_ninety" , fmt . Sprintf ( "value%d" , i / ( metricCount / 10 ) / 9 ) , // "0" for the first 90%, then "1"
2023-08-24 06:21:17 -07:00
) , [ ] chunks . Sample { sample { 100 , 0 , nil , nil } } ) )
2021-02-09 09:38:35 -08:00
}
blockDir := createBlock ( b , tmpdir , seriesEntries )
files , err := sequenceFiles ( chunkDir ( blockDir ) )
require . NoError ( b , err )
require . Greater ( b , len ( files ) , 0 , "No chunk created." )
// Check open err.
block , err := OpenBlock ( nil , blockDir , nil )
require . NoError ( b , err )
defer func ( ) { require . NoError ( b , block . Close ( ) ) } ( )
indexReader , err := block . Index ( )
require . NoError ( b , err )
defer func ( ) { require . NoError ( b , indexReader . Close ( ) ) } ( )
2022-05-04 14:41:36 -07:00
matchers := [ ] * labels . Matcher { labels . MustNewMatcher ( labels . MatchEqual , "c_ninety" , "value0" ) }
2021-02-09 09:38:35 -08:00
b . ResetTimer ( )
b . ReportAllocs ( )
for benchIdx := 0 ; benchIdx < b . N ; benchIdx ++ {
2022-05-04 14:41:36 -07:00
actualValues , err := indexReader . LabelValues ( "b_tens" , matchers ... )
2021-02-09 09:38:35 -08:00
require . NoError ( b , err )
require . Equal ( b , 9 , len ( actualValues ) )
}
}
2021-07-20 05:38:08 -07:00
func TestLabelNamesWithMatchers ( t * testing . T ) {
2022-01-22 01:55:01 -08:00
tmpdir := t . TempDir ( )
2021-07-20 05:38:08 -07:00
var seriesEntries [ ] storage . Series
for i := 0 ; i < 100 ; i ++ {
2022-03-09 14:17:29 -08:00
seriesEntries = append ( seriesEntries , storage . NewListSeries ( labels . FromStrings (
"unique" , fmt . Sprintf ( "value%d" , i ) ,
2023-08-24 06:21:17 -07:00
) , [ ] chunks . Sample { sample { 100 , 0 , nil , nil } } ) )
2021-07-20 05:38:08 -07:00
if i % 10 == 0 {
2022-03-09 14:17:29 -08:00
seriesEntries = append ( seriesEntries , storage . NewListSeries ( labels . FromStrings (
"tens" , fmt . Sprintf ( "value%d" , i / 10 ) ,
"unique" , fmt . Sprintf ( "value%d" , i ) ,
2023-08-24 06:21:17 -07:00
) , [ ] chunks . Sample { sample { 100 , 0 , nil , nil } } ) )
2021-07-20 05:38:08 -07:00
}
if i % 20 == 0 {
2022-03-09 14:17:29 -08:00
seriesEntries = append ( seriesEntries , storage . NewListSeries ( labels . FromStrings (
"tens" , fmt . Sprintf ( "value%d" , i / 10 ) ,
"twenties" , fmt . Sprintf ( "value%d" , i / 20 ) ,
"unique" , fmt . Sprintf ( "value%d" , i ) ,
2023-08-24 06:21:17 -07:00
) , [ ] chunks . Sample { sample { 100 , 0 , nil , nil } } ) )
2021-07-20 05:38:08 -07:00
}
}
blockDir := createBlock ( t , tmpdir , seriesEntries )
files , err := sequenceFiles ( chunkDir ( blockDir ) )
require . NoError ( t , err )
require . Greater ( t , len ( files ) , 0 , "No chunk created." )
// Check open err.
block , err := OpenBlock ( nil , blockDir , nil )
require . NoError ( t , err )
t . Cleanup ( func ( ) { require . NoError ( t , block . Close ( ) ) } )
indexReader , err := block . Index ( )
require . NoError ( t , err )
t . Cleanup ( func ( ) { require . NoError ( t , indexReader . Close ( ) ) } )
testCases := [ ] struct {
name string
labelName string
matchers [ ] * labels . Matcher
expectedNames [ ] string
} {
{
name : "get with non-empty unique: all" ,
matchers : [ ] * labels . Matcher { labels . MustNewMatcher ( labels . MatchNotEqual , "unique" , "" ) } ,
expectedNames : [ ] string { "tens" , "twenties" , "unique" } ,
} , {
name : "get with unique ending in 1: only unique" ,
matchers : [ ] * labels . Matcher { labels . MustNewMatcher ( labels . MatchRegexp , "unique" , "value.*1" ) } ,
expectedNames : [ ] string { "unique" } ,
} , {
name : "get with unique = value20: all" ,
matchers : [ ] * labels . Matcher { labels . MustNewMatcher ( labels . MatchEqual , "unique" , "value20" ) } ,
expectedNames : [ ] string { "tens" , "twenties" , "unique" } ,
} , {
name : "get tens = 1: unique & tens" ,
matchers : [ ] * labels . Matcher { labels . MustNewMatcher ( labels . MatchEqual , "tens" , "value1" ) } ,
expectedNames : [ ] string { "tens" , "unique" } ,
} ,
}
for _ , tt := range testCases {
t . Run ( tt . name , func ( t * testing . T ) {
actualNames , err := indexReader . LabelNames ( tt . matchers ... )
require . NoError ( t , err )
require . Equal ( t , tt . expectedNames , actualNames )
} )
}
}
2019-01-28 03:24:49 -08:00
// createBlock creates a block with given set of series and returns its dir.
2020-02-06 07:58:38 -08:00
func createBlock ( tb testing . TB , dir string , series [ ] storage . Series ) string {
2020-10-12 09:04:20 -07:00
blockDir , err := CreateBlock ( series , dir , 0 , log . NewNopLogger ( ) )
2020-10-29 02:43:23 -07:00
require . NoError ( tb , err )
2020-10-12 09:04:20 -07:00
return blockDir
2019-12-05 23:47:31 -08:00
}
func createBlockFromHead ( tb testing . TB , dir string , head * Head ) string {
2021-05-18 09:38:37 -07:00
compactor , err := NewLeveledCompactor ( context . Background ( ) , nil , log . NewNopLogger ( ) , [ ] int64 { 1000000 } , nil , nil )
2020-10-29 02:43:23 -07:00
require . NoError ( tb , err )
2019-06-07 06:41:44 -07:00
2021-10-22 01:06:44 -07:00
require . NoError ( tb , os . MkdirAll ( dir , 0 o777 ) )
2019-06-07 06:41:44 -07:00
2019-07-03 03:47:31 -07:00
// Add +1 millisecond to block maxt because block intervals are half-open: [b.MinTime, b.MaxTime).
// Because of this block intervals are always +1 than the total samples it includes.
ulid , err := compactor . Write ( dir , head , head . MinTime ( ) , head . MaxTime ( ) + 1 , nil )
2020-10-29 02:43:23 -07:00
require . NoError ( tb , err )
2019-06-07 06:41:44 -07:00
return filepath . Join ( dir , ulid . String ( ) )
}
2022-10-10 08:08:46 -07:00
func createHead ( tb testing . TB , w * wlog . WL , series [ ] storage . Series , chunkDir string ) * Head {
2021-02-09 06:12:48 -08:00
opts := DefaultHeadOptions ( )
opts . ChunkDirRoot = chunkDir
2022-09-20 10:05:50 -07:00
head , err := NewHead ( nil , nil , w , nil , opts , nil )
2020-10-29 02:43:23 -07:00
require . NoError ( tb , err )
2018-10-23 14:35:52 -07:00
2022-09-20 10:16:45 -07:00
var it chunkenc . Iterator
2022-08-29 07:48:02 -07:00
ctx := context . Background ( )
app := head . Appender ( ctx )
2019-01-28 03:24:49 -08:00
for _ , s := range series {
2021-11-06 03:10:04 -07:00
ref := storage . SeriesRef ( 0 )
2022-09-20 10:16:45 -07:00
it = s . Iterator ( it )
2021-02-18 04:07:00 -08:00
lset := s . Labels ( )
2022-08-29 07:48:02 -07:00
typ := it . Next ( )
lastTyp := typ
for ; typ != chunkenc . ValNone ; typ = it . Next ( ) {
if lastTyp != typ {
// The behaviour of appender is undefined if samples of different types
// are appended to the same series in a single Commit().
require . NoError ( tb , app . Commit ( ) )
app = head . Appender ( ctx )
}
switch typ {
case chunkenc . ValFloat :
t , v := it . At ( )
ref , err = app . Append ( ref , lset , t , v )
case chunkenc . ValHistogram :
t , h := it . AtHistogram ( )
2022-12-28 00:55:07 -08:00
ref , err = app . AppendHistogram ( ref , lset , t , h , nil )
case chunkenc . ValFloatHistogram :
t , fh := it . AtFloatHistogram ( )
ref , err = app . AppendHistogram ( ref , lset , t , nil , fh )
2022-08-29 07:48:02 -07:00
default :
err = fmt . Errorf ( "unknown sample type %s" , typ . String ( ) )
}
2020-10-29 02:43:23 -07:00
require . NoError ( tb , err )
2022-08-29 07:48:02 -07:00
lastTyp = typ
2018-10-23 14:35:52 -07:00
}
2020-10-29 02:43:23 -07:00
require . NoError ( tb , it . Err ( ) )
2018-10-23 14:35:52 -07:00
}
2020-10-29 02:43:23 -07:00
require . NoError ( tb , app . Commit ( ) )
2019-06-07 06:41:44 -07:00
return head
2018-10-23 14:35:52 -07:00
}
2019-01-28 03:24:49 -08:00
2022-10-10 08:08:46 -07:00
func createHeadWithOOOSamples ( tb testing . TB , w * wlog . WL , series [ ] storage . Series , chunkDir string , oooSampleFrequency int ) * Head {
2022-09-20 10:05:50 -07:00
opts := DefaultHeadOptions ( )
opts . ChunkDirRoot = chunkDir
opts . OutOfOrderTimeWindow . Store ( 10000000000 )
head , err := NewHead ( nil , nil , w , nil , opts , nil )
require . NoError ( tb , err )
oooSampleLabels := make ( [ ] labels . Labels , 0 , len ( series ) )
2023-08-24 06:21:17 -07:00
oooSamples := make ( [ ] chunks . SampleSlice , 0 , len ( series ) )
2022-09-20 10:05:50 -07:00
2022-09-20 10:16:45 -07:00
var it chunkenc . Iterator
2022-09-20 10:05:50 -07:00
totalSamples := 0
app := head . Appender ( context . Background ( ) )
for _ , s := range series {
ref := storage . SeriesRef ( 0 )
2022-09-20 10:16:45 -07:00
it = s . Iterator ( it )
2022-09-20 10:05:50 -07:00
lset := s . Labels ( )
2023-08-24 06:21:17 -07:00
os := chunks . SampleSlice { }
2022-09-20 10:05:50 -07:00
count := 0
2022-10-05 13:14:49 -07:00
for it . Next ( ) == chunkenc . ValFloat {
2022-09-20 10:05:50 -07:00
totalSamples ++
count ++
t , v := it . At ( )
if count % oooSampleFrequency == 0 {
2023-03-30 10:50:13 -07:00
os = append ( os , sample { t : t , f : v } )
2022-09-20 10:05:50 -07:00
continue
}
ref , err = app . Append ( ref , lset , t , v )
require . NoError ( tb , err )
}
require . NoError ( tb , it . Err ( ) )
if len ( os ) > 0 {
oooSampleLabels = append ( oooSampleLabels , lset )
oooSamples = append ( oooSamples , os )
}
}
require . NoError ( tb , app . Commit ( ) )
oooSamplesAppended := 0
require . Equal ( tb , float64 ( 0 ) , prom_testutil . ToFloat64 ( head . metrics . outOfOrderSamplesAppended ) )
app = head . Appender ( context . Background ( ) )
for i , lset := range oooSampleLabels {
ref := storage . SeriesRef ( 0 )
for _ , sample := range oooSamples [ i ] {
2023-03-30 10:50:13 -07:00
ref , err = app . Append ( ref , lset , sample . T ( ) , sample . F ( ) )
2022-09-20 10:05:50 -07:00
require . NoError ( tb , err )
oooSamplesAppended ++
}
}
require . NoError ( tb , app . Commit ( ) )
actOOOAppended := prom_testutil . ToFloat64 ( head . metrics . outOfOrderSamplesAppended )
require . GreaterOrEqual ( tb , actOOOAppended , float64 ( oooSamplesAppended - len ( series ) ) )
require . LessOrEqual ( tb , actOOOAppended , float64 ( oooSamplesAppended ) )
require . Equal ( tb , float64 ( totalSamples ) , prom_testutil . ToFloat64 ( head . metrics . samplesAppended ) )
return head
}
2019-05-27 04:24:46 -07:00
const (
defaultLabelName = "labelName"
defaultLabelValue = "labelValue"
)
2022-08-29 03:05:03 -07:00
// genSeries generates series of float64 samples with a given number of labels and values.
2020-02-06 07:58:38 -08:00
func genSeries ( totalSeries , labelCount int , mint , maxt int64 ) [ ] storage . Series {
2023-08-24 06:21:17 -07:00
return genSeriesFromSampleGenerator ( totalSeries , labelCount , mint , maxt , 1 , func ( ts int64 ) chunks . Sample {
2023-03-30 10:50:13 -07:00
return sample { t : ts , f : rand . Float64 ( ) }
2022-08-29 03:05:03 -07:00
} )
}
// genHistogramSeries generates series of histogram samples with a given number of labels and values.
2022-12-28 00:55:07 -08:00
func genHistogramSeries ( totalSeries , labelCount int , mint , maxt , step int64 , floatHistogram bool ) [ ] storage . Series {
2023-08-24 06:21:17 -07:00
return genSeriesFromSampleGenerator ( totalSeries , labelCount , mint , maxt , step , func ( ts int64 ) chunks . Sample {
2022-08-29 03:05:03 -07:00
h := & histogram . Histogram {
2023-08-22 12:51:56 -07:00
Count : 7 + uint64 ( ts * 5 ) ,
2022-08-29 03:05:03 -07:00
ZeroCount : 2 + uint64 ( ts ) ,
ZeroThreshold : 0.001 ,
Sum : 18.4 * rand . Float64 ( ) ,
Schema : 1 ,
PositiveSpans : [ ] histogram . Span {
{ Offset : 0 , Length : 2 } ,
{ Offset : 1 , Length : 2 } ,
} ,
2023-04-09 00:08:40 -07:00
PositiveBuckets : [ ] int64 { ts + 1 , 1 , - 1 , 0 } ,
2022-08-29 03:05:03 -07:00
}
2023-01-18 08:59:29 -08:00
if ts != mint {
// By setting the counter reset hint to "no counter
// reset" for all histograms but the first, we cover the
// most common cases. If the series is manipulated later
// or spans more than one block when ingested into the
// storage, the hint has to be adjusted. Note that the
// storage itself treats this particular hint the same
// as "unknown".
h . CounterResetHint = histogram . NotCounterReset
}
2022-12-28 00:55:07 -08:00
if floatHistogram {
return sample { t : ts , fh : h . ToFloat ( ) }
}
2022-08-29 03:05:03 -07:00
return sample { t : ts , h : h }
} )
}
// genHistogramAndFloatSeries generates series of mixed histogram and float64 samples with a given number of labels and values.
2022-12-28 00:55:07 -08:00
func genHistogramAndFloatSeries ( totalSeries , labelCount int , mint , maxt , step int64 , floatHistogram bool ) [ ] storage . Series {
2022-08-29 03:05:03 -07:00
floatSample := false
count := 0
2023-08-24 06:21:17 -07:00
return genSeriesFromSampleGenerator ( totalSeries , labelCount , mint , maxt , step , func ( ts int64 ) chunks . Sample {
2022-08-29 03:05:03 -07:00
count ++
var s sample
if floatSample {
2023-03-30 10:50:13 -07:00
s = sample { t : ts , f : rand . Float64 ( ) }
2022-08-29 03:05:03 -07:00
} else {
h := & histogram . Histogram {
2023-08-22 12:51:56 -07:00
Count : 7 + uint64 ( ts * 5 ) ,
2022-08-29 03:05:03 -07:00
ZeroCount : 2 + uint64 ( ts ) ,
ZeroThreshold : 0.001 ,
Sum : 18.4 * rand . Float64 ( ) ,
Schema : 1 ,
PositiveSpans : [ ] histogram . Span {
{ Offset : 0 , Length : 2 } ,
{ Offset : 1 , Length : 2 } ,
} ,
2023-04-09 00:08:40 -07:00
PositiveBuckets : [ ] int64 { ts + 1 , 1 , - 1 , 0 } ,
2022-08-29 03:05:03 -07:00
}
2023-01-18 08:59:29 -08:00
if count > 1 && count % 5 != 1 {
// Same rationale for this as above in
// genHistogramSeries, just that we have to be
// smarter to find out if the previous sample
// was a histogram, too.
h . CounterResetHint = histogram . NotCounterReset
}
2022-12-28 00:55:07 -08:00
if floatHistogram {
s = sample { t : ts , fh : h . ToFloat ( ) }
} else {
s = sample { t : ts , h : h }
}
2022-08-29 03:05:03 -07:00
}
if count % 5 == 0 {
// Flip the sample type for every 5 samples.
floatSample = ! floatSample
}
return s
} )
}
2023-08-24 06:21:17 -07:00
func genSeriesFromSampleGenerator ( totalSeries , labelCount int , mint , maxt , step int64 , generator func ( ts int64 ) chunks . Sample ) [ ] storage . Series {
2019-01-28 03:24:49 -08:00
if totalSeries == 0 || labelCount == 0 {
return nil
}
2020-02-06 07:58:38 -08:00
series := make ( [ ] storage . Series , totalSeries )
2019-06-07 06:41:44 -07:00
2019-01-28 03:24:49 -08:00
for i := 0 ; i < totalSeries ; i ++ {
lbls := make ( map [ string ] string , labelCount )
2019-05-27 04:24:46 -07:00
lbls [ defaultLabelName ] = strconv . Itoa ( i )
for j := 1 ; len ( lbls ) < labelCount ; j ++ {
lbls [ defaultLabelName + strconv . Itoa ( j ) ] = defaultLabelValue + strconv . Itoa ( j )
2019-01-28 03:24:49 -08:00
}
2023-08-24 06:21:17 -07:00
samples := make ( [ ] chunks . Sample , 0 , ( maxt - mint ) / step + 1 )
2022-08-29 03:05:03 -07:00
for t := mint ; t < maxt ; t += step {
samples = append ( samples , generator ( t ) )
2019-01-28 03:24:49 -08:00
}
2020-07-31 08:03:02 -07:00
series [ i ] = storage . NewListSeries ( labels . FromMap ( lbls ) , samples )
2019-01-28 03:24:49 -08:00
}
2019-02-14 05:29:41 -08:00
return series
}
// populateSeries generates series from given labels, mint and maxt.
2020-02-06 07:58:38 -08:00
func populateSeries ( lbls [ ] map [ string ] string , mint , maxt int64 ) [ ] storage . Series {
2019-02-14 05:29:41 -08:00
if len ( lbls ) == 0 {
return nil
}
2019-01-28 03:24:49 -08:00
2020-02-06 07:58:38 -08:00
series := make ( [ ] storage . Series , 0 , len ( lbls ) )
2019-02-14 05:29:41 -08:00
for _ , lbl := range lbls {
if len ( lbl ) == 0 {
continue
}
2023-08-24 06:21:17 -07:00
samples := make ( [ ] chunks . Sample , 0 , maxt - mint + 1 )
2019-02-14 05:29:41 -08:00
for t := mint ; t <= maxt ; t ++ {
2023-03-30 10:50:13 -07:00
samples = append ( samples , sample { t : t , f : rand . Float64 ( ) } )
2019-02-14 05:29:41 -08:00
}
2020-07-31 08:03:02 -07:00
series = append ( series , storage . NewListSeries ( labels . FromMap ( lbl ) , samples ) )
2019-02-14 05:29:41 -08:00
}
2019-01-28 03:24:49 -08:00
return series
}