mirror of
https://github.com/prometheus/node_exporter.git
synced 2025-01-12 06:17:31 -08:00
0550ab3f04
Adds a count for TCP packets received out of orders. This can be an indication that there is packet loss on the way packets travel towards this server. In that case, the sender will retransmit (and we can already monitor the Tcp_RetransSegs there), but we have no way to monitor the packet loss on the receiver side. When a packet is received and the receiver detects previous one missing, it will increase the TCPOFOQueue counter and reply with selective ACK to the sender, both possible indications of packet loss. Confirmation of packet loss can be achieved by taking packet captures, ignoring wireshark analysis, and carefully looking at data being retransmitted based on the TCP seq. Just like RetransSegs, TCPOFOQueue should be interesting for any deployment as a mean to detect packet loss, so here suggesting adding it to the default list. Signed-off-by: François Rigault <frigo@amadeus.com> Co-authored-by: François Rigault <frigo@amadeus.com>
180 lines
4.9 KiB
Go
180 lines
4.9 KiB
Go
// Copyright 2015 The Prometheus Authors
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
//go:build !nonetstat
|
|
// +build !nonetstat
|
|
|
|
package collector
|
|
|
|
import (
|
|
"bufio"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/alecthomas/kingpin/v2"
|
|
"github.com/go-kit/log"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
)
|
|
|
|
const (
|
|
netStatsSubsystem = "netstat"
|
|
)
|
|
|
|
var (
|
|
netStatFields = kingpin.Flag("collector.netstat.fields", "Regexp of fields to return for netstat collector.").Default("^(.*_(InErrors|InErrs)|Ip_Forwarding|Ip(6|Ext)_(InOctets|OutOctets)|Icmp6?_(InMsgs|OutMsgs)|TcpExt_(Listen.*|Syncookies.*|TCPSynRetrans|TCPTimeouts|TCPOFOQueue)|Tcp_(ActiveOpens|InSegs|OutSegs|OutRsts|PassiveOpens|RetransSegs|CurrEstab)|Udp6?_(InDatagrams|OutDatagrams|NoPorts|RcvbufErrors|SndbufErrors))$").String()
|
|
)
|
|
|
|
type netStatCollector struct {
|
|
fieldPattern *regexp.Regexp
|
|
logger log.Logger
|
|
}
|
|
|
|
func init() {
|
|
registerCollector("netstat", defaultEnabled, NewNetStatCollector)
|
|
}
|
|
|
|
// NewNetStatCollector takes and returns
|
|
// a new Collector exposing network stats.
|
|
func NewNetStatCollector(logger log.Logger) (Collector, error) {
|
|
pattern := regexp.MustCompile(*netStatFields)
|
|
return &netStatCollector{
|
|
fieldPattern: pattern,
|
|
logger: logger,
|
|
}, nil
|
|
}
|
|
|
|
func (c *netStatCollector) Update(ch chan<- prometheus.Metric) error {
|
|
netStats, err := getNetStats(procFilePath("net/netstat"))
|
|
if err != nil {
|
|
return fmt.Errorf("couldn't get netstats: %w", err)
|
|
}
|
|
snmpStats, err := getNetStats(procFilePath("net/snmp"))
|
|
if err != nil {
|
|
return fmt.Errorf("couldn't get SNMP stats: %w", err)
|
|
}
|
|
snmp6Stats, err := getSNMP6Stats(procFilePath("net/snmp6"))
|
|
if err != nil {
|
|
return fmt.Errorf("couldn't get SNMP6 stats: %w", err)
|
|
}
|
|
// Merge the results of snmpStats into netStats (collisions are possible, but
|
|
// we know that the keys are always unique for the given use case).
|
|
for k, v := range snmpStats {
|
|
netStats[k] = v
|
|
}
|
|
for k, v := range snmp6Stats {
|
|
netStats[k] = v
|
|
}
|
|
for protocol, protocolStats := range netStats {
|
|
for name, value := range protocolStats {
|
|
key := protocol + "_" + name
|
|
v, err := strconv.ParseFloat(value, 64)
|
|
if err != nil {
|
|
return fmt.Errorf("invalid value %s in netstats: %w", value, err)
|
|
}
|
|
if !c.fieldPattern.MatchString(key) {
|
|
continue
|
|
}
|
|
ch <- prometheus.MustNewConstMetric(
|
|
prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, netStatsSubsystem, key),
|
|
fmt.Sprintf("Statistic %s.", protocol+name),
|
|
nil, nil,
|
|
),
|
|
prometheus.UntypedValue, v,
|
|
)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func getNetStats(fileName string) (map[string]map[string]string, error) {
|
|
file, err := os.Open(fileName)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer file.Close()
|
|
|
|
return parseNetStats(file, fileName)
|
|
}
|
|
|
|
func parseNetStats(r io.Reader, fileName string) (map[string]map[string]string, error) {
|
|
var (
|
|
netStats = map[string]map[string]string{}
|
|
scanner = bufio.NewScanner(r)
|
|
)
|
|
|
|
for scanner.Scan() {
|
|
nameParts := strings.Split(scanner.Text(), " ")
|
|
scanner.Scan()
|
|
valueParts := strings.Split(scanner.Text(), " ")
|
|
// Remove trailing :.
|
|
protocol := nameParts[0][:len(nameParts[0])-1]
|
|
netStats[protocol] = map[string]string{}
|
|
if len(nameParts) != len(valueParts) {
|
|
return nil, fmt.Errorf("mismatch field count mismatch in %s: %s",
|
|
fileName, protocol)
|
|
}
|
|
for i := 1; i < len(nameParts); i++ {
|
|
netStats[protocol][nameParts[i]] = valueParts[i]
|
|
}
|
|
}
|
|
|
|
return netStats, scanner.Err()
|
|
}
|
|
|
|
func getSNMP6Stats(fileName string) (map[string]map[string]string, error) {
|
|
file, err := os.Open(fileName)
|
|
if err != nil {
|
|
// On systems with IPv6 disabled, this file won't exist.
|
|
// Do nothing.
|
|
if errors.Is(err, os.ErrNotExist) {
|
|
return nil, nil
|
|
}
|
|
|
|
return nil, err
|
|
}
|
|
defer file.Close()
|
|
|
|
return parseSNMP6Stats(file)
|
|
}
|
|
|
|
func parseSNMP6Stats(r io.Reader) (map[string]map[string]string, error) {
|
|
var (
|
|
netStats = map[string]map[string]string{}
|
|
scanner = bufio.NewScanner(r)
|
|
)
|
|
|
|
for scanner.Scan() {
|
|
stat := strings.Fields(scanner.Text())
|
|
if len(stat) < 2 {
|
|
continue
|
|
}
|
|
// Expect to have "6" in metric name, skip line otherwise
|
|
if sixIndex := strings.Index(stat[0], "6"); sixIndex != -1 {
|
|
protocol := stat[0][:sixIndex+1]
|
|
name := stat[0][sixIndex+1:]
|
|
if _, present := netStats[protocol]; !present {
|
|
netStats[protocol] = map[string]string{}
|
|
}
|
|
netStats[protocol][name] = stat[1]
|
|
}
|
|
}
|
|
|
|
return netStats, scanner.Err()
|
|
}
|