mirror of
https://github.com/prometheus/node_exporter.git
synced 2025-08-20 18:33:52 -07:00
Add Collection Timeout
Signed-off-by: Mitanshu Mittal <mitanshu.sheo@gmail.com>
This commit is contained in:
parent
0c10545e8d
commit
c2bb15b146
|
@ -15,6 +15,7 @@
|
||||||
package collector
|
package collector
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
@ -27,6 +28,7 @@ import (
|
||||||
|
|
||||||
// Namespace defines the common namespace to be used by all metrics.
|
// Namespace defines the common namespace to be used by all metrics.
|
||||||
const namespace = "node"
|
const namespace = "node"
|
||||||
|
const collectorTimeout = 20 * time.Second
|
||||||
|
|
||||||
var (
|
var (
|
||||||
scrapeDurationDesc = prometheus.NewDesc(
|
scrapeDurationDesc = prometheus.NewDesc(
|
||||||
|
@ -157,12 +159,56 @@ func (n NodeCollector) Collect(ch chan<- prometheus.Metric) {
|
||||||
|
|
||||||
func execute(name string, c Collector, ch chan<- prometheus.Metric, logger *slog.Logger) {
|
func execute(name string, c Collector, ch chan<- prometheus.Metric, logger *slog.Logger) {
|
||||||
begin := time.Now()
|
begin := time.Now()
|
||||||
err := c.Update(ch)
|
|
||||||
|
// Create a context with a 20-second timeout
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), collectorTimeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// Create a separate channel for the collector to write to
|
||||||
|
collectorCh := make(chan prometheus.Metric, 100) // Buffered channel to reduce blocking
|
||||||
|
|
||||||
|
// Create a channel to receive the error from Update
|
||||||
|
errCh := make(chan error, 1)
|
||||||
|
|
||||||
|
// Run Update in a goroutine
|
||||||
|
go func() {
|
||||||
|
defer close(collectorCh)
|
||||||
|
errCh <- c.Update(collectorCh)
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Forward metrics from the collector to the channel
|
||||||
|
var err error
|
||||||
|
doForwarding := true
|
||||||
|
|
||||||
|
for doForwarding {
|
||||||
|
select {
|
||||||
|
case metric, ok := <-collectorCh:
|
||||||
|
if !ok {
|
||||||
|
// Channel closed, get the error from Update
|
||||||
|
err = <-errCh
|
||||||
|
doForwarding = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
sendMetricSafely(ch, metric, logger, name)
|
||||||
|
case <-ctx.Done():
|
||||||
|
err = ctx.Err()
|
||||||
|
doForwarding = false
|
||||||
|
// Start a goroutine to drain collectorCh without blocking
|
||||||
|
go func() {
|
||||||
|
for range collectorCh {
|
||||||
|
// Drain collectorCh
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
duration := time.Since(begin)
|
duration := time.Since(begin)
|
||||||
var success float64
|
var success float64
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if IsNoDataError(err) {
|
if err == context.DeadlineExceeded {
|
||||||
|
logger.Error("collector timed out", "name", name, "duration_seconds", duration.Seconds(), "err", err)
|
||||||
|
} else if IsNoDataError(err) {
|
||||||
logger.Debug("collector returned no data", "name", name, "duration_seconds", duration.Seconds(), "err", err)
|
logger.Debug("collector returned no data", "name", name, "duration_seconds", duration.Seconds(), "err", err)
|
||||||
} else {
|
} else {
|
||||||
logger.Error("collector failed", "name", name, "duration_seconds", duration.Seconds(), "err", err)
|
logger.Error("collector failed", "name", name, "duration_seconds", duration.Seconds(), "err", err)
|
||||||
|
@ -172,8 +218,18 @@ func execute(name string, c Collector, ch chan<- prometheus.Metric, logger *slog
|
||||||
logger.Debug("collector succeeded", "name", name, "duration_seconds", duration.Seconds())
|
logger.Debug("collector succeeded", "name", name, "duration_seconds", duration.Seconds())
|
||||||
success = 1
|
success = 1
|
||||||
}
|
}
|
||||||
ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, duration.Seconds(), name)
|
// Send the final metrics
|
||||||
ch <- prometheus.MustNewConstMetric(scrapeSuccessDesc, prometheus.GaugeValue, success, name)
|
sendMetricSafely(ch, prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, duration.Seconds(), name), logger, name)
|
||||||
|
sendMetricSafely(ch, prometheus.MustNewConstMetric(scrapeSuccessDesc, prometheus.GaugeValue, success, name), logger, name)
|
||||||
|
}
|
||||||
|
|
||||||
|
// sendMetricSafely sends a metric to the channel, logging a warning if it fails
|
||||||
|
func sendMetricSafely(ch chan<- prometheus.Metric, metric prometheus.Metric, logger *slog.Logger, name string) {
|
||||||
|
select {
|
||||||
|
case ch <- metric:
|
||||||
|
default:
|
||||||
|
logger.Warn("msg", "failed to send metric", "name", name)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Collector is the interface a collector has to implement.
|
// Collector is the interface a collector has to implement.
|
||||||
|
|
Loading…
Reference in a new issue