Improve remote storage queue manager metrics.

This commit is contained in:
Julius Volz 2015-09-16 16:22:18 +02:00 committed by Fabian Reinartz
parent 9a6e7b3e3b
commit 09b557a085

View file

@ -62,7 +62,8 @@ type StorageQueueManager struct {
samplesCount *prometheus.CounterVec
sendLatency prometheus.Summary
sendErrors prometheus.Counter
failedBatches prometheus.Counter
failedSamples prometheus.Counter
queueLength prometheus.Gauge
queueCapacity prometheus.Metric
}
@ -92,15 +93,22 @@ func NewStorageQueueManager(tsdb StorageClient, queueCapacity int) *StorageQueue
sendLatency: prometheus.NewSummary(prometheus.SummaryOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "sent_latency_milliseconds",
Name: "send_latency_seconds",
Help: "Latency quantiles for sending sample batches to the remote storage.",
ConstLabels: constLabels,
}),
sendErrors: prometheus.NewCounter(prometheus.CounterOpts{
failedBatches: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "sent_errors_total",
Help: "Total number of errors sending sample batches to the remote storage.",
Name: "failed_batches_total",
Help: "Total number of sample batches that encountered an error while being sent to the remote storage.",
ConstLabels: constLabels,
}),
failedSamples: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "failed_samples_total",
Help: "Total number of samples that encountered an error while being sent to the remote storage.",
ConstLabels: constLabels,
}),
queueLength: prometheus.NewGauge(prometheus.GaugeOpts{
@ -151,6 +159,8 @@ func (t *StorageQueueManager) Stop() {
func (t *StorageQueueManager) Describe(ch chan<- *prometheus.Desc) {
t.samplesCount.Describe(ch)
t.sendLatency.Describe(ch)
ch <- t.failedBatches.Desc()
ch <- t.failedSamples.Desc()
ch <- t.queueLength.Desc()
ch <- t.queueCapacity.Desc()
}
@ -160,6 +170,8 @@ func (t *StorageQueueManager) Collect(ch chan<- prometheus.Metric) {
t.samplesCount.Collect(ch)
t.sendLatency.Collect(ch)
t.queueLength.Set(float64(len(t.queue)))
ch <- t.failedBatches
ch <- t.failedSamples
ch <- t.queueLength
ch <- t.queueCapacity
}
@ -175,13 +187,14 @@ func (t *StorageQueueManager) sendSamples(s model.Samples) {
// floor.
begin := time.Now()
err := t.tsdb.Store(s)
duration := time.Since(begin) / time.Millisecond
duration := time.Since(begin) / time.Second
labelValue := success
if err != nil {
log.Warnf("error sending %d samples to remote storage: %s", len(s), err)
labelValue = failure
t.sendErrors.Inc()
t.failedBatches.Inc()
t.failedSamples.Add(float64(len(s)))
}
t.samplesCount.WithLabelValues(labelValue).Add(float64(len(s)))
t.sendLatency.Observe(float64(duration))