mirror of
https://github.com/prometheus/prometheus.git
synced 2024-11-14 17:44:06 -08:00
Improve remote storage queue manager metrics.
This commit is contained in:
parent
9a6e7b3e3b
commit
09b557a085
|
@ -62,7 +62,8 @@ type StorageQueueManager struct {
|
||||||
|
|
||||||
samplesCount *prometheus.CounterVec
|
samplesCount *prometheus.CounterVec
|
||||||
sendLatency prometheus.Summary
|
sendLatency prometheus.Summary
|
||||||
sendErrors prometheus.Counter
|
failedBatches prometheus.Counter
|
||||||
|
failedSamples prometheus.Counter
|
||||||
queueLength prometheus.Gauge
|
queueLength prometheus.Gauge
|
||||||
queueCapacity prometheus.Metric
|
queueCapacity prometheus.Metric
|
||||||
}
|
}
|
||||||
|
@ -92,15 +93,22 @@ func NewStorageQueueManager(tsdb StorageClient, queueCapacity int) *StorageQueue
|
||||||
sendLatency: prometheus.NewSummary(prometheus.SummaryOpts{
|
sendLatency: prometheus.NewSummary(prometheus.SummaryOpts{
|
||||||
Namespace: namespace,
|
Namespace: namespace,
|
||||||
Subsystem: subsystem,
|
Subsystem: subsystem,
|
||||||
Name: "sent_latency_milliseconds",
|
Name: "send_latency_seconds",
|
||||||
Help: "Latency quantiles for sending sample batches to the remote storage.",
|
Help: "Latency quantiles for sending sample batches to the remote storage.",
|
||||||
ConstLabels: constLabels,
|
ConstLabels: constLabels,
|
||||||
}),
|
}),
|
||||||
sendErrors: prometheus.NewCounter(prometheus.CounterOpts{
|
failedBatches: prometheus.NewCounter(prometheus.CounterOpts{
|
||||||
Namespace: namespace,
|
Namespace: namespace,
|
||||||
Subsystem: subsystem,
|
Subsystem: subsystem,
|
||||||
Name: "sent_errors_total",
|
Name: "failed_batches_total",
|
||||||
Help: "Total number of errors sending sample batches to the remote storage.",
|
Help: "Total number of sample batches that encountered an error while being sent to the remote storage.",
|
||||||
|
ConstLabels: constLabels,
|
||||||
|
}),
|
||||||
|
failedSamples: prometheus.NewCounter(prometheus.CounterOpts{
|
||||||
|
Namespace: namespace,
|
||||||
|
Subsystem: subsystem,
|
||||||
|
Name: "failed_samples_total",
|
||||||
|
Help: "Total number of samples that encountered an error while being sent to the remote storage.",
|
||||||
ConstLabels: constLabels,
|
ConstLabels: constLabels,
|
||||||
}),
|
}),
|
||||||
queueLength: prometheus.NewGauge(prometheus.GaugeOpts{
|
queueLength: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||||
|
@ -151,6 +159,8 @@ func (t *StorageQueueManager) Stop() {
|
||||||
func (t *StorageQueueManager) Describe(ch chan<- *prometheus.Desc) {
|
func (t *StorageQueueManager) Describe(ch chan<- *prometheus.Desc) {
|
||||||
t.samplesCount.Describe(ch)
|
t.samplesCount.Describe(ch)
|
||||||
t.sendLatency.Describe(ch)
|
t.sendLatency.Describe(ch)
|
||||||
|
ch <- t.failedBatches.Desc()
|
||||||
|
ch <- t.failedSamples.Desc()
|
||||||
ch <- t.queueLength.Desc()
|
ch <- t.queueLength.Desc()
|
||||||
ch <- t.queueCapacity.Desc()
|
ch <- t.queueCapacity.Desc()
|
||||||
}
|
}
|
||||||
|
@ -160,6 +170,8 @@ func (t *StorageQueueManager) Collect(ch chan<- prometheus.Metric) {
|
||||||
t.samplesCount.Collect(ch)
|
t.samplesCount.Collect(ch)
|
||||||
t.sendLatency.Collect(ch)
|
t.sendLatency.Collect(ch)
|
||||||
t.queueLength.Set(float64(len(t.queue)))
|
t.queueLength.Set(float64(len(t.queue)))
|
||||||
|
ch <- t.failedBatches
|
||||||
|
ch <- t.failedSamples
|
||||||
ch <- t.queueLength
|
ch <- t.queueLength
|
||||||
ch <- t.queueCapacity
|
ch <- t.queueCapacity
|
||||||
}
|
}
|
||||||
|
@ -175,13 +187,14 @@ func (t *StorageQueueManager) sendSamples(s model.Samples) {
|
||||||
// floor.
|
// floor.
|
||||||
begin := time.Now()
|
begin := time.Now()
|
||||||
err := t.tsdb.Store(s)
|
err := t.tsdb.Store(s)
|
||||||
duration := time.Since(begin) / time.Millisecond
|
duration := time.Since(begin) / time.Second
|
||||||
|
|
||||||
labelValue := success
|
labelValue := success
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("error sending %d samples to remote storage: %s", len(s), err)
|
log.Warnf("error sending %d samples to remote storage: %s", len(s), err)
|
||||||
labelValue = failure
|
labelValue = failure
|
||||||
t.sendErrors.Inc()
|
t.failedBatches.Inc()
|
||||||
|
t.failedSamples.Add(float64(len(s)))
|
||||||
}
|
}
|
||||||
t.samplesCount.WithLabelValues(labelValue).Add(float64(len(s)))
|
t.samplesCount.WithLabelValues(labelValue).Add(float64(len(s)))
|
||||||
t.sendLatency.Observe(float64(duration))
|
t.sendLatency.Observe(float64(duration))
|
||||||
|
|
Loading…
Reference in a new issue