mirror of
https://github.com/prometheus/node_exporter.git
synced 2025-02-02 08:42:31 -08:00
Handle vanishing PIDs (#1043)
PIDs can vanish (exit) from /proc/ between gathering the list of PIDs and getting all of their stats. * Ignore file not found errors. * Explicitly count the PIDs we find. * Cleanup some error style issues. Signed-off-by: Ben Kochie <superq@gmail.com>
This commit is contained in:
parent
099c1527f1
commit
fe5a117831
|
@ -14,9 +14,9 @@ The wifi collector is disabled by default due to suspected caching issues and go
|
||||||
* [FEATURE] Add socket unit stats to systemd collector #968
|
* [FEATURE] Add socket unit stats to systemd collector #968
|
||||||
* [FEATURE] Collect start time for systemd units
|
* [FEATURE] Collect start time for systemd units
|
||||||
* [ENHANCEMENT]
|
* [ENHANCEMENT]
|
||||||
* [BUGFIX]
|
|
||||||
|
|
||||||
* [BUGFIX] Fix goroutine leak in supervisord collector
|
* [BUGFIX] Fix goroutine leak in supervisord collector
|
||||||
|
* [BUGFIX] Handle vanishing PIDs #1043
|
||||||
|
|
||||||
## 0.16.0 / 2018-05-15
|
## 0.16.0 / 2018-05-15
|
||||||
|
|
||||||
|
|
0
collector/fixtures/proc/11/.missing_stat
Normal file
0
collector/fixtures/proc/11/.missing_stat
Normal file
|
@ -17,7 +17,10 @@ package collector
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
"github.com/prometheus/common/log"
|
||||||
"github.com/prometheus/procfs"
|
"github.com/prometheus/procfs"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -62,13 +65,13 @@ func NewProcessStatCollector() (Collector, error) {
|
||||||
func (t *processCollector) Update(ch chan<- prometheus.Metric) error {
|
func (t *processCollector) Update(ch chan<- prometheus.Metric) error {
|
||||||
pids, states, threads, err := getAllocatedThreads()
|
pids, states, threads, err := getAllocatedThreads()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Unable to retrieve number of allocated threads %v\n", err)
|
return fmt.Errorf("unable to retrieve number of allocated threads: %q", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
ch <- prometheus.MustNewConstMetric(t.threadAlloc, prometheus.GaugeValue, float64(threads))
|
ch <- prometheus.MustNewConstMetric(t.threadAlloc, prometheus.GaugeValue, float64(threads))
|
||||||
maxThreads, err := readUintFromFile(procFilePath("sys/kernel/threads-max"))
|
maxThreads, err := readUintFromFile(procFilePath("sys/kernel/threads-max"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Unable to retrieve limit number of threads %v\n", err)
|
return fmt.Errorf("unable to retrieve limit number of threads: %q", err)
|
||||||
}
|
}
|
||||||
ch <- prometheus.MustNewConstMetric(t.threadLimit, prometheus.GaugeValue, float64(maxThreads))
|
ch <- prometheus.MustNewConstMetric(t.threadLimit, prometheus.GaugeValue, float64(maxThreads))
|
||||||
|
|
||||||
|
@ -78,7 +81,7 @@ func (t *processCollector) Update(ch chan<- prometheus.Metric) error {
|
||||||
|
|
||||||
pidM, err := readUintFromFile(procFilePath("sys/kernel/pid_max"))
|
pidM, err := readUintFromFile(procFilePath("sys/kernel/pid_max"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Unable to retrieve limit number of maximum pids alloved %v\n", err)
|
return fmt.Errorf("unable to retrieve limit number of maximum pids alloved: %q", err)
|
||||||
}
|
}
|
||||||
ch <- prometheus.MustNewConstMetric(t.pidUsed, prometheus.GaugeValue, float64(pids))
|
ch <- prometheus.MustNewConstMetric(t.pidUsed, prometheus.GaugeValue, float64(pids))
|
||||||
ch <- prometheus.MustNewConstMetric(t.pidMax, prometheus.GaugeValue, float64(pidM))
|
ch <- prometheus.MustNewConstMetric(t.pidMax, prometheus.GaugeValue, float64(pidM))
|
||||||
|
@ -95,15 +98,22 @@ func getAllocatedThreads() (int, map[string]int32, int, error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, nil, 0, err
|
return 0, nil, 0, err
|
||||||
}
|
}
|
||||||
|
pids := 0
|
||||||
thread := 0
|
thread := 0
|
||||||
procStates := make(map[string]int32)
|
procStates := make(map[string]int32)
|
||||||
for _, pid := range p {
|
for _, pid := range p {
|
||||||
stat, err := pid.NewStat()
|
stat, err := pid.NewStat()
|
||||||
|
// PIDs can vanish between getting the list and getting stats.
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
log.Debugf("file not found when retrieving stats: %q", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, nil, 0, err
|
return 0, nil, 0, err
|
||||||
}
|
}
|
||||||
|
pids += 1
|
||||||
procStates[stat.State] += 1
|
procStates[stat.State] += 1
|
||||||
thread += stat.NumThreads
|
thread += stat.NumThreads
|
||||||
}
|
}
|
||||||
return len(p), procStates, thread, nil
|
return pids, procStates, thread, nil
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue