Check of duplicated samples directly in scrapeCache.get()

Avoid extra map lookup by hooking check into cache get.

```
goos: linux
goarch: amd64
pkg: github.com/prometheus/prometheus/scrape
cpu: Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz
                     │  main.txt   │              new.txt               │
                     │   sec/op    │   sec/op     vs base               │
ScrapeLoopAppend-8     66.72µ ± 0%   66.89µ ± 0%       ~ (p=0.879 n=50)
ScrapeLoopAppendOM-8   66.61µ ± 0%   66.89µ ± 1%       ~ (p=0.115 n=50)
geomean                66.66µ        66.89µ       +0.34%

                     │   main.txt    │               new.txt                │
                     │     B/op      │     B/op      vs base                │
ScrapeLoopAppend-8     20.17Ki ±  1%   20.12Ki ± 1%        ~ (p=0.343 n=50)
ScrapeLoopAppendOM-8   20.38Ki ± 10%   17.99Ki ± 2%  -11.69% (p=0.017 n=50)
geomean                20.27Ki         19.03Ki        -6.14%

                     │  main.txt  │               new.txt               │
                     │ allocs/op  │ allocs/op   vs base                 │
ScrapeLoopAppend-8     11.00 ± 0%   11.00 ± 0%       ~ (p=1.000 n=50) ¹
ScrapeLoopAppendOM-8   12.00 ± 0%   12.00 ± 0%       ~ (p=1.000 n=50) ¹
geomean                11.49        11.49       +0.00%
¹ all samples are equal
```

Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
This commit is contained in:
Łukasz Mierzwa 2024-02-27 12:09:32 +00:00
parent 21f8b35f5b
commit c013a3c1b5

View file

@ -954,13 +954,14 @@ func (c *scrapeCache) iterDone(flushCache bool) {
} }
} }
func (c *scrapeCache) get(met []byte) (*cacheEntry, bool) { func (c *scrapeCache) get(met []byte) (*cacheEntry, bool, bool) {
e, ok := c.series[string(met)] e, ok := c.series[string(met)]
if !ok { if !ok {
return nil, false return nil, false, false
} }
alreadyScraped := e.lastIter == c.iter
e.lastIter = c.iter e.lastIter = c.iter
return e, true return e, true, alreadyScraped
} }
func (c *scrapeCache) addRef(met []byte, ref storage.SeriesRef, lset labels.Labels, hash uint64) { func (c *scrapeCache) addRef(met []byte, ref storage.SeriesRef, lset labels.Labels, hash uint64) {
@ -1512,13 +1513,13 @@ func (sl *scrapeLoop) append(app storage.Appender, b []byte, contentType string,
loop: loop:
for { for {
var ( var (
et textparse.Entry et textparse.Entry
sampleAdded, isHistogram, seriesAlreadyScraped bool sampleAdded, isHistogram bool
met []byte met []byte
parsedTimestamp *int64 parsedTimestamp *int64
val float64 val float64
h *histogram.Histogram h *histogram.Histogram
fh *histogram.FloatHistogram fh *histogram.FloatHistogram
) )
if et, err = p.Next(); err != nil { if et, err = p.Next(); err != nil {
if errors.Is(err, io.EOF) { if errors.Is(err, io.EOF) {
@ -1564,7 +1565,7 @@ loop:
if sl.cache.getDropped(met) { if sl.cache.getDropped(met) {
continue continue
} }
ce, ok := sl.cache.get(met) ce, ok, seriesAlreadyScraped := sl.cache.get(met)
var ( var (
ref storage.SeriesRef ref storage.SeriesRef
hash uint64 hash uint64
@ -1610,7 +1611,6 @@ loop:
updateMetadata(lset, true) updateMetadata(lset, true)
} }
_, seriesAlreadyScraped = sl.cache.seriesCur[hash]
if seriesAlreadyScraped { if seriesAlreadyScraped {
err = storage.ErrDuplicateSampleForTimestamp err = storage.ErrDuplicateSampleForTimestamp
} else { } else {
@ -1882,7 +1882,7 @@ func (sl *scrapeLoop) reportStale(app storage.Appender, start time.Time) (err er
} }
func (sl *scrapeLoop) addReportSample(app storage.Appender, s []byte, t int64, v float64, b *labels.Builder) error { func (sl *scrapeLoop) addReportSample(app storage.Appender, s []byte, t int64, v float64, b *labels.Builder) error {
ce, ok := sl.cache.get(s) ce, ok, _ := sl.cache.get(s)
var ref storage.SeriesRef var ref storage.SeriesRef
var lset labels.Labels var lset labels.Labels
if ok { if ok {