fix(nhcb): do not return nhcb from parse if exponential is present (#15209)

From: https://github.com/prometheus/prometheus/pull/14978#discussion_r1800755481
Also encode the requirement table set in #13532

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
This commit is contained in:
George Krajcsovits 2024-10-24 18:14:05 +02:00 committed by GitHub
parent 2182b83271
commit 469573b13b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 286 additions and 104 deletions

View file

@ -100,6 +100,8 @@ type NHCBParser struct {
// is part of the same classic histogram. // is part of the same classic histogram.
lastHistogramName string lastHistogramName string
lastHistogramLabelsHash uint64 lastHistogramLabelsHash uint64
lastHistogramExponential bool
// Reused buffer for hashing labels.
hBuffer []byte hBuffer []byte
} }
@ -199,10 +201,21 @@ func (p *NHCBParser) Next() (Entry, error) {
p.bytes, p.ts, p.value = p.parser.Series() p.bytes, p.ts, p.value = p.parser.Series()
p.metricString = p.parser.Metric(&p.lset) p.metricString = p.parser.Metric(&p.lset)
// Check the label set to see if we can continue or need to emit the NHCB. // Check the label set to see if we can continue or need to emit the NHCB.
if p.compareLabels() && p.processNHCB() { var isNHCB bool
if p.compareLabels() {
// Labels differ. Check if we can emit the NHCB.
if p.processNHCB() {
return EntryHistogram, nil return EntryHistogram, nil
} }
isNHCB := p.handleClassicHistogramSeries(p.lset) isNHCB = p.handleClassicHistogramSeries(p.lset)
} else {
// Labels are the same. Check if after an exponential histogram.
if p.lastHistogramExponential {
isNHCB = false
} else {
isNHCB = p.handleClassicHistogramSeries(p.lset)
}
}
if isNHCB && !p.keepClassicHistograms { if isNHCB && !p.keepClassicHistograms {
// Do not return the classic histogram series if it was converted to NHCB and we are not keeping classic histograms. // Do not return the classic histogram series if it was converted to NHCB and we are not keeping classic histograms.
return p.Next() return p.Next()
@ -211,6 +224,7 @@ func (p *NHCBParser) Next() (Entry, error) {
case EntryHistogram: case EntryHistogram:
p.bytes, p.ts, p.h, p.fh = p.parser.Histogram() p.bytes, p.ts, p.h, p.fh = p.parser.Histogram()
p.metricString = p.parser.Metric(&p.lset) p.metricString = p.parser.Metric(&p.lset)
p.storeExponentialLabels()
case EntryType: case EntryType:
p.bName, p.typ = p.parser.Type() p.bName, p.typ = p.parser.Type()
} }
@ -239,9 +253,16 @@ func (p *NHCBParser) compareLabels() bool {
} }
// Save the label set of the classic histogram without suffix and bucket `le` label. // Save the label set of the classic histogram without suffix and bucket `le` label.
func (p *NHCBParser) storeBaseLabels() { func (p *NHCBParser) storeClassicLabels() {
p.lastHistogramName = convertnhcb.GetHistogramMetricBaseName(p.lset.Get(labels.MetricName)) p.lastHistogramName = convertnhcb.GetHistogramMetricBaseName(p.lset.Get(labels.MetricName))
p.lastHistogramLabelsHash, _ = p.lset.HashWithoutLabels(p.hBuffer, labels.BucketLabel) p.lastHistogramLabelsHash, _ = p.lset.HashWithoutLabels(p.hBuffer, labels.BucketLabel)
p.lastHistogramExponential = false
}
func (p *NHCBParser) storeExponentialLabels() {
p.lastHistogramName = p.lset.Get(labels.MetricName)
p.lastHistogramLabelsHash, _ = p.lset.HashWithoutLabels(p.hBuffer)
p.lastHistogramExponential = true
} }
// handleClassicHistogramSeries collates the classic histogram series to be converted to NHCB // handleClassicHistogramSeries collates the classic histogram series to be converted to NHCB
@ -282,7 +303,7 @@ func (p *NHCBParser) handleClassicHistogramSeries(lset labels.Labels) bool {
func (p *NHCBParser) processClassicHistogramSeries(lset labels.Labels, suffix string, updateHist func(*convertnhcb.TempHistogram)) { func (p *NHCBParser) processClassicHistogramSeries(lset labels.Labels, suffix string, updateHist func(*convertnhcb.TempHistogram)) {
if p.state != stateCollecting { if p.state != stateCollecting {
p.storeBaseLabels() p.storeClassicLabels()
p.tempCT = p.parser.CreatedTimestamp() p.tempCT = p.parser.CreatedTimestamp()
p.state = stateCollecting p.state = stateCollecting
} }

View file

@ -16,6 +16,7 @@ package textparse
import ( import (
"bytes" "bytes"
"encoding/binary" "encoding/binary"
"strconv"
"testing" "testing"
"github.com/gogo/protobuf/proto" "github.com/gogo/protobuf/proto"
@ -493,7 +494,6 @@ something_bucket{a="b",le="+Inf"} 9 # {id="something-test"} 2e100 123.000
{Labels: labels.FromStrings("id", "something-test"), Value: 0.5}, {Labels: labels.FromStrings("id", "something-test"), Value: 0.5},
{Labels: labels.FromStrings("id", "something-test"), Value: 8.0}, {Labels: labels.FromStrings("id", "something-test"), Value: 8.0},
}, },
// TODO(krajorama): ct: int64p(1520430001000),
}, { }, {
m: `something{a="b"}`, m: `something{a="b"}`,
shs: &histogram.Histogram{ shs: &histogram.Histogram{
@ -509,7 +509,6 @@ something_bucket{a="b",le="+Inf"} 9 # {id="something-test"} 2e100 123.000
{Labels: labels.FromStrings("id", "something-test"), Value: 0.0, HasTs: true, Ts: 123321}, {Labels: labels.FromStrings("id", "something-test"), Value: 0.0, HasTs: true, Ts: 123321},
{Labels: labels.FromStrings("id", "something-test"), Value: 2e100, HasTs: true, Ts: 123000}, {Labels: labels.FromStrings("id", "something-test"), Value: 2e100, HasTs: true, Ts: 123000},
}, },
// TODO(krajorama): ct: int64p(1520430002000),
}, },
} }
@ -520,28 +519,100 @@ something_bucket{a="b",le="+Inf"} 9 # {id="something-test"} 2e100 123.000
requireEntries(t, exp, got) requireEntries(t, exp, got)
} }
// Verify that the NHCBParser does not parse the NHCB when the exponential is present. // Verify the requirement tables from
// https://github.com/prometheus/prometheus/issues/13532 .
// "classic" means the option "always_scrape_classic_histograms".
// "nhcb" means the option "convert_classic_histograms_to_nhcb".
//
// Currently only with the ProtoBuf parser that supports exponential
// histograms.
//
// Case 1. Only classic histogram is exposed.
//
// | Scrape Config | Expect classic | Expect exponential | Expect NHCB |.
// | classic=false, nhcb=false | YES | NO | NO |.
// | classic=true, nhcb=false | YES | NO | NO |.
// | classic=false, nhcb=true | NO | NO | YES |.
// | classic=true, nhcb=true | YES | NO | YES |.
//
// Case 2. Both classic and exponential histograms are exposed.
//
// | Scrape Config | Expect classic | Expect exponential | Expect NHCB |.
// | classic=false, nhcb=false | NO | YES | NO |.
// | classic=true, nhcb=false | YES | YES | NO |.
// | classic=false, nhcb=true | NO | YES | NO |.
// | classic=true, nhcb=true | YES | YES | NO |.
//
// Case 3. Only exponential histogram is exposed.
//
// | Scrape Config | Expect classic | Expect exponential | Expect NHCB |.
// | classic=false, nhcb=false | NO | YES | NO |.
// | classic=true, nhcb=false | NO | YES | NO |.
// | classic=false, nhcb=true | NO | YES | NO |.
// | classic=true, nhcb=true | NO | YES | NO |.
func TestNHCBParserProtoBufParser_NoNHCBWhenExponential(t *testing.T) { func TestNHCBParserProtoBufParser_NoNHCBWhenExponential(t *testing.T) {
inputBuf := createTestProtoBufHistogram(t) type requirement struct {
// Initialize the protobuf parser so that it returns classic histograms as expectClassic bool
// well when there's both classic and exponential histograms. expectExponential bool
p := NewProtobufParser(inputBuf.Bytes(), true, labels.NewSymbolTable()) expectNHCB bool
}
// Initialize the NHCBParser so that it returns classic histograms as well cases := []map[string]requirement{
// when there's both classic and exponential histograms. // Case 1.
p = NewNHCBParser(p, labels.NewSymbolTable(), true)
exp := []parsedEntry{
{ {
m: "test_histogram", "classic=false, nhcb=false": {expectClassic: true, expectExponential: false, expectNHCB: false},
help: "Test histogram with classic and exponential buckets.", "classic=true, nhcb=false": {expectClassic: true, expectExponential: false, expectNHCB: false},
"classic=false, nhcb=true": {expectClassic: false, expectExponential: false, expectNHCB: true},
"classic=true, nhcb=true": {expectClassic: true, expectExponential: false, expectNHCB: true},
}, },
// Case 2.
{ {
m: "test_histogram", "classic=false, nhcb=false": {expectClassic: false, expectExponential: true, expectNHCB: false},
"classic=true, nhcb=false": {expectClassic: true, expectExponential: true, expectNHCB: false},
"classic=false, nhcb=true": {expectClassic: false, expectExponential: true, expectNHCB: false},
"classic=true, nhcb=true": {expectClassic: true, expectExponential: true, expectNHCB: false},
},
// Case 3.
{
"classic=false, nhcb=false": {expectClassic: false, expectExponential: true, expectNHCB: false},
"classic=true, nhcb=false": {expectClassic: false, expectExponential: true, expectNHCB: false},
"classic=false, nhcb=true": {expectClassic: false, expectExponential: true, expectNHCB: false},
"classic=true, nhcb=true": {expectClassic: false, expectExponential: true, expectNHCB: false},
},
}
type testCase struct {
name string
classic bool
nhcb bool
exp []parsedEntry
}
testCases := []testCase{}
for _, classic := range []bool{false, true} {
for _, nhcb := range []bool{false, true} {
tc := testCase{
name: "classic=" + strconv.FormatBool(classic) + ", nhcb=" + strconv.FormatBool(nhcb),
classic: classic,
nhcb: nhcb,
exp: []parsedEntry{},
}
for i, caseI := range cases {
req := caseI[tc.name]
metric := "test_histogram" + strconv.Itoa(i+1)
tc.exp = append(tc.exp, parsedEntry{
m: metric,
help: "Test histogram " + strconv.Itoa(i+1),
})
tc.exp = append(tc.exp, parsedEntry{
m: metric,
typ: model.MetricTypeHistogram, typ: model.MetricTypeHistogram,
}, })
if req.expectExponential {
// Always expect exponential histogram first.
exponentialSeries := []parsedEntry{
{ {
m: "test_histogram", m: metric,
shs: &histogram.Histogram{ shs: &histogram.Histogram{
Schema: 3, Schema: 3,
Count: 175, Count: 175,
@ -553,59 +624,66 @@ func TestNHCBParserProtoBufParser_NoNHCBWhenExponential(t *testing.T) {
PositiveBuckets: []int64{1, 2, -1, -1}, PositiveBuckets: []int64{1, 2, -1, -1},
NegativeBuckets: []int64{1, 3, -2, -1, 1}, NegativeBuckets: []int64{1, 3, -2, -1, 1},
}, },
lset: labels.FromStrings("__name__", "test_histogram"), lset: labels.FromStrings("__name__", metric),
t: int64p(1234568), t: int64p(1234568),
ct: int64p(1000), ct: int64p(1000),
}, },
}
tc.exp = append(tc.exp, exponentialSeries...)
}
if req.expectClassic {
// Always expect classic histogram series after exponential.
classicSeries := []parsedEntry{
{ {
m: "test_histogram_count", m: metric + "_count",
v: 175, v: 175,
lset: labels.FromStrings("__name__", "test_histogram_count"), lset: labels.FromStrings("__name__", metric+"_count"),
t: int64p(1234568), t: int64p(1234568),
ct: int64p(1000), ct: int64p(1000),
}, },
{ {
m: "test_histogram_sum", m: metric + "_sum",
v: 0.0008280461746287094, v: 0.0008280461746287094,
lset: labels.FromStrings("__name__", "test_histogram_sum"), lset: labels.FromStrings("__name__", metric+"_sum"),
t: int64p(1234568), t: int64p(1234568),
ct: int64p(1000), ct: int64p(1000),
}, },
{ {
m: "test_histogram_bucket\xffle\xff-0.0004899999999999998", m: metric + "_bucket\xffle\xff-0.0004899999999999998",
v: 2, v: 2,
lset: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0004899999999999998"), lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0004899999999999998"),
t: int64p(1234568), t: int64p(1234568),
ct: int64p(1000), ct: int64p(1000),
}, },
{ {
m: "test_histogram_bucket\xffle\xff-0.0003899999999999998", m: metric + "_bucket\xffle\xff-0.0003899999999999998",
v: 4, v: 4,
lset: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0003899999999999998"), lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0003899999999999998"),
t: int64p(1234568), t: int64p(1234568),
ct: int64p(1000), ct: int64p(1000),
}, },
{ {
m: "test_histogram_bucket\xffle\xff-0.0002899999999999998", m: metric + "_bucket\xffle\xff-0.0002899999999999998",
v: 16, v: 16,
lset: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0002899999999999998"), lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0002899999999999998"),
t: int64p(1234568), t: int64p(1234568),
ct: int64p(1000), ct: int64p(1000),
}, },
{ {
m: "test_histogram_bucket\xffle\xff+Inf", m: metric + "_bucket\xffle\xff+Inf",
v: 175, v: 175,
lset: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), lset: labels.FromStrings("__name__", metric+"_bucket", "le", "+Inf"),
t: int64p(1234568), t: int64p(1234568),
ct: int64p(1000), ct: int64p(1000),
}, },
}
tc.exp = append(tc.exp, classicSeries...)
}
if req.expectNHCB {
// Always expect NHCB series after classic.
nhcbSeries := []parsedEntry{
{ {
// TODO(krajorama): optimize: this should not be here. In case there's m: metric + "{}",
// an exponential histogram we should not convert the classic histogram
// to NHCB. In the end TSDB will throw this away with
// storage.errDuplicateSampleForTimestamp error at Commit(), but it
// is better to avoid this conversion in the first place.
m: "test_histogram{}",
shs: &histogram.Histogram{ shs: &histogram.Histogram{
Schema: histogram.CustomBucketsSchema, Schema: histogram.CustomBucketsSchema,
Count: 175, Count: 175,
@ -614,18 +692,35 @@ func TestNHCBParserProtoBufParser_NoNHCBWhenExponential(t *testing.T) {
PositiveBuckets: []int64{2, 0, 10, 147}, PositiveBuckets: []int64{2, 0, 10, 147},
CustomValues: []float64{-0.0004899999999999998, -0.0003899999999999998, -0.0002899999999999998}, CustomValues: []float64{-0.0004899999999999998, -0.0003899999999999998, -0.0002899999999999998},
}, },
lset: labels.FromStrings("__name__", "test_histogram"), lset: labels.FromStrings("__name__", metric),
t: int64p(1234568), t: int64p(1234568),
ct: int64p(1000), ct: int64p(1000),
}, },
} }
tc.exp = append(tc.exp, nhcbSeries...)
}
}
testCases = append(testCases, tc)
}
}
inputBuf := createTestProtoBufHistogram(t)
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
p := NewProtobufParser(inputBuf.Bytes(), tc.classic, labels.NewSymbolTable())
if tc.nhcb {
p = NewNHCBParser(p, labels.NewSymbolTable(), tc.classic)
}
got := testParse(t, p) got := testParse(t, p)
requireEntries(t, exp, got) requireEntries(t, tc.exp, got)
})
}
} }
func createTestProtoBufHistogram(t *testing.T) *bytes.Buffer { func createTestProtoBufHistogram(t *testing.T) *bytes.Buffer {
testMetricFamilies := []string{`name: "test_histogram" testMetricFamilies := []string{`name: "test_histogram1"
help: "Test histogram with classic and exponential buckets." help: "Test histogram 1"
type: HISTOGRAM type: HISTOGRAM
metric: < metric: <
histogram: < histogram: <
@ -647,6 +742,72 @@ metric: <
cumulative_count: 16 cumulative_count: 16
upper_bound: -0.0002899999999999998 upper_bound: -0.0002899999999999998
> >
>
timestamp_ms: 1234568
>`, `name: "test_histogram2"
help: "Test histogram 2"
type: HISTOGRAM
metric: <
histogram: <
created_timestamp: <
seconds: 1
nanos: 1
>
sample_count: 175
sample_sum: 0.0008280461746287094
bucket: <
cumulative_count: 2
upper_bound: -0.0004899999999999998
>
bucket: <
cumulative_count: 4
upper_bound: -0.0003899999999999998
>
bucket: <
cumulative_count: 16
upper_bound: -0.0002899999999999998
>
schema: 3
zero_threshold: 2.938735877055719e-39
zero_count: 2
negative_span: <
offset: -162
length: 1
>
negative_span: <
offset: 23
length: 4
>
negative_delta: 1
negative_delta: 3
negative_delta: -2
negative_delta: -1
negative_delta: 1
positive_span: <
offset: -161
length: 1
>
positive_span: <
offset: 8
length: 3
>
positive_delta: 1
positive_delta: 2
positive_delta: -1
positive_delta: -1
>
timestamp_ms: 1234568
>`, `name: "test_histogram3"
help: "Test histogram 3"
type: HISTOGRAM
metric: <
histogram: <
created_timestamp: <
seconds: 1
nanos: 1
>
sample_count: 175
sample_sum: 0.0008280461746287094
schema: 3 schema: 3
zero_threshold: 2.938735877055719e-39 zero_threshold: 2.938735877055719e-39
zero_count: 2 zero_count: 2