2018-06-05 10:38:32 -07:00
|
|
|
// Copyright 2018 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2021-10-03 04:35:24 -07:00
|
|
|
//go:build !noprocesses
|
2018-06-05 10:38:32 -07:00
|
|
|
// +build !noprocesses
|
|
|
|
|
|
|
|
package collector
|
|
|
|
|
|
|
|
import (
|
2020-06-15 13:27:14 -07:00
|
|
|
"errors"
|
2018-06-05 10:38:32 -07:00
|
|
|
"fmt"
|
2024-09-11 01:51:28 -07:00
|
|
|
"log/slog"
|
2018-08-13 08:27:23 -07:00
|
|
|
"os"
|
2021-10-08 01:39:26 -07:00
|
|
|
"path"
|
|
|
|
"strconv"
|
2021-01-24 16:15:59 -08:00
|
|
|
"strings"
|
|
|
|
"syscall"
|
2018-08-13 08:27:23 -07:00
|
|
|
|
2018-06-05 10:38:32 -07:00
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
|
|
"github.com/prometheus/procfs"
|
|
|
|
)
|
|
|
|
|
|
|
|
type processCollector struct {
|
2021-10-08 01:39:26 -07:00
|
|
|
fs procfs.FS
|
|
|
|
threadAlloc *prometheus.Desc
|
|
|
|
threadLimit *prometheus.Desc
|
|
|
|
threadsState *prometheus.Desc
|
|
|
|
procsState *prometheus.Desc
|
|
|
|
pidUsed *prometheus.Desc
|
|
|
|
pidMax *prometheus.Desc
|
2024-09-11 01:51:28 -07:00
|
|
|
logger *slog.Logger
|
2018-06-05 10:38:32 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
registerCollector("processes", defaultDisabled, NewProcessStatCollector)
|
|
|
|
}
|
|
|
|
|
2018-10-15 09:44:06 -07:00
|
|
|
// NewProcessStatCollector returns a new Collector exposing process data read from the proc filesystem.
|
2024-09-11 01:51:28 -07:00
|
|
|
func NewProcessStatCollector(logger *slog.Logger) (Collector, error) {
|
2019-04-10 09:16:12 -07:00
|
|
|
fs, err := procfs.NewFS(*procPath)
|
|
|
|
if err != nil {
|
2019-11-29 05:51:31 -08:00
|
|
|
return nil, fmt.Errorf("failed to open procfs: %w", err)
|
2019-04-10 09:16:12 -07:00
|
|
|
}
|
2018-06-05 10:38:32 -07:00
|
|
|
subsystem := "processes"
|
|
|
|
return &processCollector{
|
2019-04-10 09:16:12 -07:00
|
|
|
fs: fs,
|
2018-06-05 10:38:32 -07:00
|
|
|
threadAlloc: prometheus.NewDesc(
|
|
|
|
prometheus.BuildFQName(namespace, subsystem, "threads"),
|
|
|
|
"Allocated threads in system",
|
|
|
|
nil, nil,
|
|
|
|
),
|
|
|
|
threadLimit: prometheus.NewDesc(
|
|
|
|
prometheus.BuildFQName(namespace, subsystem, "max_threads"),
|
|
|
|
"Limit of threads in the system",
|
|
|
|
nil, nil,
|
|
|
|
),
|
2021-10-08 01:39:26 -07:00
|
|
|
threadsState: prometheus.NewDesc(
|
|
|
|
prometheus.BuildFQName(namespace, subsystem, "threads_state"),
|
|
|
|
"Number of threads in each state.",
|
|
|
|
[]string{"thread_state"}, nil,
|
|
|
|
),
|
2018-06-05 10:38:32 -07:00
|
|
|
procsState: prometheus.NewDesc(
|
|
|
|
prometheus.BuildFQName(namespace, subsystem, "state"),
|
|
|
|
"Number of processes in each state.",
|
|
|
|
[]string{"state"}, nil,
|
|
|
|
),
|
|
|
|
pidUsed: prometheus.NewDesc(prometheus.BuildFQName(namespace, subsystem, "pids"),
|
|
|
|
"Number of PIDs", nil, nil,
|
|
|
|
),
|
|
|
|
pidMax: prometheus.NewDesc(prometheus.BuildFQName(namespace, subsystem, "max_processes"),
|
|
|
|
"Number of max PIDs limit", nil, nil,
|
|
|
|
),
|
2019-12-31 08:19:37 -08:00
|
|
|
logger: logger,
|
2018-06-05 10:38:32 -07:00
|
|
|
}, nil
|
|
|
|
}
|
2019-04-10 09:16:12 -07:00
|
|
|
func (c *processCollector) Update(ch chan<- prometheus.Metric) error {
|
2021-10-08 01:39:26 -07:00
|
|
|
pids, states, threads, threadStates, err := c.getAllocatedThreads()
|
2018-06-05 10:38:32 -07:00
|
|
|
if err != nil {
|
2020-06-15 13:27:14 -07:00
|
|
|
return fmt.Errorf("unable to retrieve number of allocated threads: %w", err)
|
2018-06-05 10:38:32 -07:00
|
|
|
}
|
|
|
|
|
2019-04-10 09:16:12 -07:00
|
|
|
ch <- prometheus.MustNewConstMetric(c.threadAlloc, prometheus.GaugeValue, float64(threads))
|
2018-06-05 10:38:32 -07:00
|
|
|
maxThreads, err := readUintFromFile(procFilePath("sys/kernel/threads-max"))
|
|
|
|
if err != nil {
|
2020-06-15 13:27:14 -07:00
|
|
|
return fmt.Errorf("unable to retrieve limit number of threads: %w", err)
|
2018-06-05 10:38:32 -07:00
|
|
|
}
|
2019-04-10 09:16:12 -07:00
|
|
|
ch <- prometheus.MustNewConstMetric(c.threadLimit, prometheus.GaugeValue, float64(maxThreads))
|
2018-06-05 10:38:32 -07:00
|
|
|
|
|
|
|
for state := range states {
|
2019-04-10 09:16:12 -07:00
|
|
|
ch <- prometheus.MustNewConstMetric(c.procsState, prometheus.GaugeValue, float64(states[state]), state)
|
2018-06-05 10:38:32 -07:00
|
|
|
}
|
|
|
|
|
2021-10-08 01:39:26 -07:00
|
|
|
for state := range threadStates {
|
|
|
|
ch <- prometheus.MustNewConstMetric(c.threadsState, prometheus.GaugeValue, float64(threadStates[state]), state)
|
|
|
|
}
|
|
|
|
|
2018-06-05 10:38:32 -07:00
|
|
|
pidM, err := readUintFromFile(procFilePath("sys/kernel/pid_max"))
|
|
|
|
if err != nil {
|
2020-06-15 13:27:14 -07:00
|
|
|
return fmt.Errorf("unable to retrieve limit number of maximum pids alloved: %w", err)
|
2018-06-05 10:38:32 -07:00
|
|
|
}
|
2019-04-10 09:16:12 -07:00
|
|
|
ch <- prometheus.MustNewConstMetric(c.pidUsed, prometheus.GaugeValue, float64(pids))
|
|
|
|
ch <- prometheus.MustNewConstMetric(c.pidMax, prometheus.GaugeValue, float64(pidM))
|
2018-06-05 10:38:32 -07:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-10-08 01:39:26 -07:00
|
|
|
func (c *processCollector) getAllocatedThreads() (int, map[string]int32, int, map[string]int32, error) {
|
2019-04-10 09:16:12 -07:00
|
|
|
p, err := c.fs.AllProcs()
|
2018-06-05 10:38:32 -07:00
|
|
|
if err != nil {
|
2021-10-08 01:39:26 -07:00
|
|
|
return 0, nil, 0, nil, fmt.Errorf("unable to list all processes: %w", err)
|
2018-06-05 10:38:32 -07:00
|
|
|
}
|
2018-08-13 08:27:23 -07:00
|
|
|
pids := 0
|
2018-06-05 10:38:32 -07:00
|
|
|
thread := 0
|
|
|
|
procStates := make(map[string]int32)
|
2021-10-08 01:39:26 -07:00
|
|
|
threadStates := make(map[string]int32)
|
|
|
|
|
2018-06-05 10:38:32 -07:00
|
|
|
for _, pid := range p {
|
2019-06-12 11:47:16 -07:00
|
|
|
stat, err := pid.Stat()
|
2018-06-05 10:38:32 -07:00
|
|
|
if err != nil {
|
2021-01-25 07:29:59 -08:00
|
|
|
// PIDs can vanish between getting the list and getting stats.
|
2021-10-08 01:39:26 -07:00
|
|
|
if c.isIgnoredError(err) {
|
2024-09-11 01:51:28 -07:00
|
|
|
c.logger.Debug("file not found when retrieving stats for pid", "pid", pid.PID, "err", err)
|
2021-01-25 07:29:59 -08:00
|
|
|
continue
|
|
|
|
}
|
2024-09-11 01:51:28 -07:00
|
|
|
c.logger.Debug("error reading stat for pid", "pid", pid.PID, "err", err)
|
2021-10-08 01:39:26 -07:00
|
|
|
return 0, nil, 0, nil, fmt.Errorf("error reading stat for pid %d: %w", pid.PID, err)
|
2018-06-05 10:38:32 -07:00
|
|
|
}
|
2018-10-15 09:44:06 -07:00
|
|
|
pids++
|
|
|
|
procStates[stat.State]++
|
2018-06-05 10:38:32 -07:00
|
|
|
thread += stat.NumThreads
|
2021-10-08 01:39:26 -07:00
|
|
|
err = c.getThreadStates(pid.PID, stat, threadStates)
|
|
|
|
if err != nil {
|
|
|
|
return 0, nil, 0, nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return pids, procStates, thread, threadStates, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *processCollector) getThreadStates(pid int, pidStat procfs.ProcStat, threadStates map[string]int32) error {
|
|
|
|
fs, err := procfs.NewFS(procFilePath(path.Join(strconv.Itoa(pid), "task")))
|
|
|
|
if err != nil {
|
|
|
|
if c.isIgnoredError(err) {
|
2024-09-11 01:51:28 -07:00
|
|
|
c.logger.Debug("file not found when retrieving tasks for pid", "pid", pid, "err", err)
|
2021-10-08 01:39:26 -07:00
|
|
|
return nil
|
|
|
|
}
|
2024-09-11 01:51:28 -07:00
|
|
|
c.logger.Debug("error reading tasks for pid", "pid", pid, "err", err)
|
2021-10-08 01:39:26 -07:00
|
|
|
return fmt.Errorf("error reading task for pid %d: %w", pid, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
t, err := fs.AllProcs()
|
|
|
|
if err != nil {
|
|
|
|
if c.isIgnoredError(err) {
|
2024-09-11 01:51:28 -07:00
|
|
|
c.logger.Debug("file not found when retrieving tasks for pid", "pid", pid, "err", err)
|
2021-10-08 01:39:26 -07:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return fmt.Errorf("unable to list all threads for pid: %d %w", pid, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, thread := range t {
|
|
|
|
if pid == thread.PID {
|
|
|
|
threadStates[pidStat.State]++
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
threadStat, err := thread.Stat()
|
|
|
|
if err != nil {
|
|
|
|
if c.isIgnoredError(err) {
|
2024-09-11 01:51:28 -07:00
|
|
|
c.logger.Debug("file not found when retrieving stats for thread", "pid", pid, "threadId", thread.PID, "err", err)
|
2021-10-08 01:39:26 -07:00
|
|
|
continue
|
|
|
|
}
|
2024-09-11 01:51:28 -07:00
|
|
|
c.logger.Debug("error reading stat for thread", "pid", pid, "threadId", thread.PID, "err", err)
|
2021-10-08 01:39:26 -07:00
|
|
|
return fmt.Errorf("error reading stat for pid:%d thread:%d err:%w", pid, thread.PID, err)
|
|
|
|
}
|
|
|
|
threadStates[threadStat.State]++
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *processCollector) isIgnoredError(err error) bool {
|
|
|
|
if errors.Is(err, os.ErrNotExist) || strings.Contains(err.Error(), syscall.ESRCH.Error()) {
|
|
|
|
return true
|
2018-06-05 10:38:32 -07:00
|
|
|
}
|
2021-10-08 01:39:26 -07:00
|
|
|
return false
|
2018-06-05 10:38:32 -07:00
|
|
|
}
|