Check of duplicated samples directly in scrapeCache.get()

Avoid extra map lookup by hooking check into cache get.

```
goos: linux
goarch: amd64
pkg: github.com/prometheus/prometheus/scrape
cpu: Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz
                     │  main.txt   │              new.txt               │
                     │   sec/op    │   sec/op     vs base               │
ScrapeLoopAppend-8     66.72µ ± 0%   66.89µ ± 0%       ~ (p=0.879 n=50)
ScrapeLoopAppendOM-8   66.61µ ± 0%   66.89µ ± 1%       ~ (p=0.115 n=50)
geomean                66.66µ        66.89µ       +0.34%

                     │   main.txt    │               new.txt                │
                     │     B/op      │     B/op      vs base                │
ScrapeLoopAppend-8     20.17Ki ±  1%   20.12Ki ± 1%        ~ (p=0.343 n=50)
ScrapeLoopAppendOM-8   20.38Ki ± 10%   17.99Ki ± 2%  -11.69% (p=0.017 n=50)
geomean                20.27Ki         19.03Ki        -6.14%

                     │  main.txt  │               new.txt               │
                     │ allocs/op  │ allocs/op   vs base                 │
ScrapeLoopAppend-8     11.00 ± 0%   11.00 ± 0%       ~ (p=1.000 n=50) ¹
ScrapeLoopAppendOM-8   12.00 ± 0%   12.00 ± 0%       ~ (p=1.000 n=50) ¹
geomean                11.49        11.49       +0.00%
¹ all samples are equal
```

Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
This commit is contained in:
Łukasz Mierzwa 2024-02-27 12:09:32 +00:00
parent 21f8b35f5b
commit c013a3c1b5

View file

@ -954,13 +954,14 @@ func (c *scrapeCache) iterDone(flushCache bool) {
}
}
func (c *scrapeCache) get(met []byte) (*cacheEntry, bool) {
func (c *scrapeCache) get(met []byte) (*cacheEntry, bool, bool) {
e, ok := c.series[string(met)]
if !ok {
return nil, false
return nil, false, false
}
alreadyScraped := e.lastIter == c.iter
e.lastIter = c.iter
return e, true
return e, true, alreadyScraped
}
func (c *scrapeCache) addRef(met []byte, ref storage.SeriesRef, lset labels.Labels, hash uint64) {
@ -1512,13 +1513,13 @@ func (sl *scrapeLoop) append(app storage.Appender, b []byte, contentType string,
loop:
for {
var (
et textparse.Entry
sampleAdded, isHistogram, seriesAlreadyScraped bool
met []byte
parsedTimestamp *int64
val float64
h *histogram.Histogram
fh *histogram.FloatHistogram
et textparse.Entry
sampleAdded, isHistogram bool
met []byte
parsedTimestamp *int64
val float64
h *histogram.Histogram
fh *histogram.FloatHistogram
)
if et, err = p.Next(); err != nil {
if errors.Is(err, io.EOF) {
@ -1564,7 +1565,7 @@ loop:
if sl.cache.getDropped(met) {
continue
}
ce, ok := sl.cache.get(met)
ce, ok, seriesAlreadyScraped := sl.cache.get(met)
var (
ref storage.SeriesRef
hash uint64
@ -1610,7 +1611,6 @@ loop:
updateMetadata(lset, true)
}
_, seriesAlreadyScraped = sl.cache.seriesCur[hash]
if seriesAlreadyScraped {
err = storage.ErrDuplicateSampleForTimestamp
} else {
@ -1882,7 +1882,7 @@ func (sl *scrapeLoop) reportStale(app storage.Appender, start time.Time) (err er
}
func (sl *scrapeLoop) addReportSample(app storage.Appender, s []byte, t int64, v float64, b *labels.Builder) error {
ce, ok := sl.cache.get(s)
ce, ok, _ := sl.cache.get(s)
var ref storage.SeriesRef
var lset labels.Labels
if ok {