mirror of
https://github.com/prometheus/prometheus.git
synced 2024-11-14 17:44:06 -08:00
Improve remote storage queue manager metrics.
This commit is contained in:
parent
9a6e7b3e3b
commit
09b557a085
|
@ -62,7 +62,8 @@ type StorageQueueManager struct {
|
|||
|
||||
samplesCount *prometheus.CounterVec
|
||||
sendLatency prometheus.Summary
|
||||
sendErrors prometheus.Counter
|
||||
failedBatches prometheus.Counter
|
||||
failedSamples prometheus.Counter
|
||||
queueLength prometheus.Gauge
|
||||
queueCapacity prometheus.Metric
|
||||
}
|
||||
|
@ -92,15 +93,22 @@ func NewStorageQueueManager(tsdb StorageClient, queueCapacity int) *StorageQueue
|
|||
sendLatency: prometheus.NewSummary(prometheus.SummaryOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "sent_latency_milliseconds",
|
||||
Name: "send_latency_seconds",
|
||||
Help: "Latency quantiles for sending sample batches to the remote storage.",
|
||||
ConstLabels: constLabels,
|
||||
}),
|
||||
sendErrors: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
failedBatches: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "sent_errors_total",
|
||||
Help: "Total number of errors sending sample batches to the remote storage.",
|
||||
Name: "failed_batches_total",
|
||||
Help: "Total number of sample batches that encountered an error while being sent to the remote storage.",
|
||||
ConstLabels: constLabels,
|
||||
}),
|
||||
failedSamples: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "failed_samples_total",
|
||||
Help: "Total number of samples that encountered an error while being sent to the remote storage.",
|
||||
ConstLabels: constLabels,
|
||||
}),
|
||||
queueLength: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
|
@ -151,6 +159,8 @@ func (t *StorageQueueManager) Stop() {
|
|||
func (t *StorageQueueManager) Describe(ch chan<- *prometheus.Desc) {
|
||||
t.samplesCount.Describe(ch)
|
||||
t.sendLatency.Describe(ch)
|
||||
ch <- t.failedBatches.Desc()
|
||||
ch <- t.failedSamples.Desc()
|
||||
ch <- t.queueLength.Desc()
|
||||
ch <- t.queueCapacity.Desc()
|
||||
}
|
||||
|
@ -160,6 +170,8 @@ func (t *StorageQueueManager) Collect(ch chan<- prometheus.Metric) {
|
|||
t.samplesCount.Collect(ch)
|
||||
t.sendLatency.Collect(ch)
|
||||
t.queueLength.Set(float64(len(t.queue)))
|
||||
ch <- t.failedBatches
|
||||
ch <- t.failedSamples
|
||||
ch <- t.queueLength
|
||||
ch <- t.queueCapacity
|
||||
}
|
||||
|
@ -175,13 +187,14 @@ func (t *StorageQueueManager) sendSamples(s model.Samples) {
|
|||
// floor.
|
||||
begin := time.Now()
|
||||
err := t.tsdb.Store(s)
|
||||
duration := time.Since(begin) / time.Millisecond
|
||||
duration := time.Since(begin) / time.Second
|
||||
|
||||
labelValue := success
|
||||
if err != nil {
|
||||
log.Warnf("error sending %d samples to remote storage: %s", len(s), err)
|
||||
labelValue = failure
|
||||
t.sendErrors.Inc()
|
||||
t.failedBatches.Inc()
|
||||
t.failedSamples.Add(float64(len(s)))
|
||||
}
|
||||
t.samplesCount.WithLabelValues(labelValue).Add(float64(len(s)))
|
||||
t.sendLatency.Observe(float64(duration))
|
||||
|
|
Loading…
Reference in a new issue