node_exporter/collector/processes_linux.go
Ben Kochie 5d2a4cf7fb
Fix processes collector long int parsing
Update procfs library to include ignored fields ParseInt handling.

Wrap error returns so that the user can know more about what failed.
Returns from getAllocatedThreads() are errors anyway.

Fixes: https://github.com/prometheus/node_exporter/issues/2110

Signed-off-by: Ben Kochie <superq@gmail.com>
2021-08-06 05:55:24 +02:00

130 lines
4.2 KiB
Go

// Copyright 2018 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// +build !noprocesses
package collector
import (
"errors"
"fmt"
"os"
"strings"
"syscall"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/procfs"
)
type processCollector struct {
fs procfs.FS
threadAlloc *prometheus.Desc
threadLimit *prometheus.Desc
procsState *prometheus.Desc
pidUsed *prometheus.Desc
pidMax *prometheus.Desc
logger log.Logger
}
func init() {
registerCollector("processes", defaultDisabled, NewProcessStatCollector)
}
// NewProcessStatCollector returns a new Collector exposing process data read from the proc filesystem.
func NewProcessStatCollector(logger log.Logger) (Collector, error) {
fs, err := procfs.NewFS(*procPath)
if err != nil {
return nil, fmt.Errorf("failed to open procfs: %w", err)
}
subsystem := "processes"
return &processCollector{
fs: fs,
threadAlloc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "threads"),
"Allocated threads in system",
nil, nil,
),
threadLimit: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "max_threads"),
"Limit of threads in the system",
nil, nil,
),
procsState: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "state"),
"Number of processes in each state.",
[]string{"state"}, nil,
),
pidUsed: prometheus.NewDesc(prometheus.BuildFQName(namespace, subsystem, "pids"),
"Number of PIDs", nil, nil,
),
pidMax: prometheus.NewDesc(prometheus.BuildFQName(namespace, subsystem, "max_processes"),
"Number of max PIDs limit", nil, nil,
),
logger: logger,
}, nil
}
func (c *processCollector) Update(ch chan<- prometheus.Metric) error {
pids, states, threads, err := c.getAllocatedThreads()
if err != nil {
return fmt.Errorf("unable to retrieve number of allocated threads: %w", err)
}
ch <- prometheus.MustNewConstMetric(c.threadAlloc, prometheus.GaugeValue, float64(threads))
maxThreads, err := readUintFromFile(procFilePath("sys/kernel/threads-max"))
if err != nil {
return fmt.Errorf("unable to retrieve limit number of threads: %w", err)
}
ch <- prometheus.MustNewConstMetric(c.threadLimit, prometheus.GaugeValue, float64(maxThreads))
for state := range states {
ch <- prometheus.MustNewConstMetric(c.procsState, prometheus.GaugeValue, float64(states[state]), state)
}
pidM, err := readUintFromFile(procFilePath("sys/kernel/pid_max"))
if err != nil {
return fmt.Errorf("unable to retrieve limit number of maximum pids alloved: %w", err)
}
ch <- prometheus.MustNewConstMetric(c.pidUsed, prometheus.GaugeValue, float64(pids))
ch <- prometheus.MustNewConstMetric(c.pidMax, prometheus.GaugeValue, float64(pidM))
return nil
}
func (c *processCollector) getAllocatedThreads() (int, map[string]int32, int, error) {
p, err := c.fs.AllProcs()
if err != nil {
return 0, nil, 0, fmt.Errorf("unable to list all processes: %w", err)
}
pids := 0
thread := 0
procStates := make(map[string]int32)
for _, pid := range p {
stat, err := pid.Stat()
if err != nil {
// PIDs can vanish between getting the list and getting stats.
if errors.Is(err, os.ErrNotExist) || strings.Contains(err.Error(), syscall.ESRCH.Error()) {
level.Debug(c.logger).Log("msg", "file not found when retrieving stats for pid", "pid", pid, "err", err)
continue
}
level.Debug(c.logger).Log("msg", "error reading stat for pid", "pid", pid.PID, "err", err)
return 0, nil, 0, fmt.Errorf("error reading stat for pid %d: %w", pid.PID, err)
}
pids++
procStates[stat.State]++
thread += stat.NumThreads
}
return pids, procStates, thread, nil
}