Add a collector for NFS client statistics.

This change adds a new collector called "nfs" that parses the contents
of /proc/net/rpc/nfs and turns it into metrics. It can be used to
inspect the number of operations per type, but also to keep an eye on an
extraneous number of retransmissions, which may indicate connectivity
issues.

I've picked the name "nfs", as most operating systems use "nfs" for the
client component and "nfsd" as the server component. If we want to add
stats for the NFS server as well, we'd better call such a collector
"nfsd".
This commit is contained in:
Ed Schouten 2016-11-29 14:32:52 +01:00 committed by Ed Schouten
parent 006d1c7922
commit a696830c38
5 changed files with 289 additions and 0 deletions

View file

@ -49,6 +49,7 @@ ksmd | Exposes kernel and system statistics from `/sys/kernel/mm/ksm`. | Linux
logind | Exposes session counts from [logind](http://www.freedesktop.org/wiki/Software/systemd/logind/). | Linux
megacli | Exposes RAID statistics from MegaCLI. | Linux
meminfo_numa | Exposes memory statistics from `/proc/meminfo_numa`. | Linux
nfs | Exposes NFS client statistics from `/proc/net/rpc/nfs`. This is the same information as `nfsstat -c`. | Linux
ntp | Exposes time drift from an NTP server. | _any_
runit | Exposes service status from [runit](http://smarden.org/runit/). | _any_
supervisord | Exposes service status from [supervisord](http://supervisord.org/). | _any_

View file

@ -1743,6 +1743,112 @@ node_nf_conntrack_entries 123
# HELP node_nf_conntrack_entries_limit Maximum size of connection tracking table.
# TYPE node_nf_conntrack_entries_limit gauge
node_nf_conntrack_entries_limit 65536
# HELP node_nfs_net_connections Number of connections at the network layer.
# TYPE node_nfs_net_connections counter
node_nfs_net_connections{protocol="tcp"} 45
# HELP node_nfs_net_reads Number of reads at the network layer.
# TYPE node_nfs_net_reads counter
node_nfs_net_reads{protocol="tcp"} 69
node_nfs_net_reads{protocol="udp"} 70
# HELP node_nfs_procedures Number of NFS procedures invoked.
# TYPE node_nfs_procedures counter
node_nfs_procedures{procedure="access",version="3"} 1.17661341e+08
node_nfs_procedures{procedure="access",version="4"} 58
node_nfs_procedures{procedure="close",version="4"} 28
node_nfs_procedures{procedure="commit",version="3"} 23729
node_nfs_procedures{procedure="commit",version="4"} 83
node_nfs_procedures{procedure="create",version="2"} 52
node_nfs_procedures{procedure="create",version="3"} 2.993289e+06
node_nfs_procedures{procedure="create",version="4"} 15
node_nfs_procedures{procedure="create_session",version="4"} 32
node_nfs_procedures{procedure="delegreturn",version="4"} 97
node_nfs_procedures{procedure="destroy_session",version="4"} 67
node_nfs_procedures{procedure="exchange_id",version="4"} 58
node_nfs_procedures{procedure="fs_locations",version="4"} 32
node_nfs_procedures{procedure="fsid_present",version="4"} 11
node_nfs_procedures{procedure="fsinfo",version="3"} 2
node_nfs_procedures{procedure="fsinfo",version="4"} 68
node_nfs_procedures{procedure="fsstat",version="3"} 13332
node_nfs_procedures{procedure="get_lease_time",version="4"} 28
node_nfs_procedures{procedure="getacl",version="4"} 36
node_nfs_procedures{procedure="getattr",version="2"} 57
node_nfs_procedures{procedure="getattr",version="3"} 1.061909262e+09
node_nfs_procedures{procedure="getattr",version="4"} 88
node_nfs_procedures{procedure="getdeviceinfo",version="4"} 1
node_nfs_procedures{procedure="layoutcommit",version="4"} 26
node_nfs_procedures{procedure="layoutget",version="4"} 90
node_nfs_procedures{procedure="layoutreturn",version="4"} 0
node_nfs_procedures{procedure="link",version="2"} 17
node_nfs_procedures{procedure="link",version="3"} 0
node_nfs_procedures{procedure="link",version="4"} 21
node_nfs_procedures{procedure="lock",version="4"} 39
node_nfs_procedures{procedure="lockt",version="4"} 68
node_nfs_procedures{procedure="locku",version="4"} 59
node_nfs_procedures{procedure="lookup",version="2"} 71
node_nfs_procedures{procedure="lookup",version="3"} 4.077635e+06
node_nfs_procedures{procedure="lookup",version="4"} 29
node_nfs_procedures{procedure="lookup_root",version="4"} 74
node_nfs_procedures{procedure="mkdir",version="2"} 50
node_nfs_procedures{procedure="mkdir",version="3"} 590
node_nfs_procedures{procedure="mknod",version="3"} 0
node_nfs_procedures{procedure="null",version="2"} 16
node_nfs_procedures{procedure="null",version="3"} 0
node_nfs_procedures{procedure="null",version="4"} 98
node_nfs_procedures{procedure="open",version="4"} 85
node_nfs_procedures{procedure="open_confirm",version="4"} 23
node_nfs_procedures{procedure="open_downgrade",version="4"} 1
node_nfs_procedures{procedure="open_noattr",version="4"} 24
node_nfs_procedures{procedure="pathconf",version="3"} 1
node_nfs_procedures{procedure="pathconf",version="4"} 53
node_nfs_procedures{procedure="read",version="2"} 45
node_nfs_procedures{procedure="read",version="3"} 2.9391916e+07
node_nfs_procedures{procedure="read",version="4"} 51
node_nfs_procedures{procedure="readdir",version="2"} 70
node_nfs_procedures{procedure="readdir",version="3"} 3983
node_nfs_procedures{procedure="readdir",version="4"} 66
node_nfs_procedures{procedure="readdirplus",version="3"} 92385
node_nfs_procedures{procedure="readlink",version="2"} 73
node_nfs_procedures{procedure="readlink",version="3"} 5
node_nfs_procedures{procedure="readlink",version="4"} 54
node_nfs_procedures{procedure="reclaim_complete",version="4"} 35
node_nfs_procedures{procedure="release_lockowner",version="4"} 85
node_nfs_procedures{procedure="remove",version="2"} 83
node_nfs_procedures{procedure="remove",version="3"} 7815
node_nfs_procedures{procedure="remove",version="4"} 69
node_nfs_procedures{procedure="rename",version="2"} 61
node_nfs_procedures{procedure="rename",version="3"} 1130
node_nfs_procedures{procedure="rename",version="4"} 96
node_nfs_procedures{procedure="renew",version="4"} 83
node_nfs_procedures{procedure="rmdir",version="2"} 23
node_nfs_procedures{procedure="rmdir",version="3"} 15
node_nfs_procedures{procedure="root",version="2"} 52
node_nfs_procedures{procedure="secinfo",version="4"} 81
node_nfs_procedures{procedure="sequence",version="4"} 13
node_nfs_procedures{procedure="server_caps",version="4"} 56
node_nfs_procedures{procedure="setacl",version="4"} 49
node_nfs_procedures{procedure="setattr",version="2"} 74
node_nfs_procedures{procedure="setattr",version="3"} 48906
node_nfs_procedures{procedure="setattr",version="4"} 73
node_nfs_procedures{procedure="setclientid",version="4"} 12
node_nfs_procedures{procedure="setclientid_confirm",version="4"} 84
node_nfs_procedures{procedure="statfs",version="2"} 82
node_nfs_procedures{procedure="statfs",version="4"} 86
node_nfs_procedures{procedure="symlink",version="2"} 53
node_nfs_procedures{procedure="symlink",version="3"} 0
node_nfs_procedures{procedure="symlink",version="4"} 84
node_nfs_procedures{procedure="write",version="2"} 0
node_nfs_procedures{procedure="write",version="3"} 2.570425e+06
node_nfs_procedures{procedure="write",version="4"} 54
node_nfs_procedures{procedure="writecache",version="2"} 86
# HELP node_nfs_rpc_authentication_refreshes Number of RPC authentication refreshes performed.
# TYPE node_nfs_rpc_authentication_refreshes counter
node_nfs_rpc_authentication_refreshes 1.218815394e+09
# HELP node_nfs_rpc_operations Number of RPCs performed.
# TYPE node_nfs_rpc_operations counter
node_nfs_rpc_operations 1.218785755e+09
# HELP node_nfs_rpc_retransmissions Number of RPC transmissions performed.
# TYPE node_nfs_rpc_retransmissions counter
node_nfs_rpc_retransmissions 374636
# HELP node_procs_blocked Number of processes blocked waiting for I/O to complete.
# TYPE node_procs_blocked gauge
node_procs_blocked 0

View file

@ -0,0 +1,5 @@
net 70 70 69 45
rpc 1218785755 374636 1218815394
proc2 18 16 57 74 52 71 73 45 86 0 52 83 61 17 53 50 23 70 82
proc3 22 0 1061909262 48906 4077635 117661341 5 29391916 2570425 2993289 590 0 0 7815 15 1130 0 3983 92385 13332 2 1 23729
proc4 48 98 51 54 83 85 23 24 1 28 73 68 83 12 84 39 68 59 58 88 29 74 69 96 21 84 15 53 86 54 66 56 97 36 49 32 85 81 11 58 32 67 13 28 35 90 1 26 0

176
collector/nfs_linux.go Normal file
View file

@ -0,0 +1,176 @@
// Copyright 2016 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"errors"
"io/ioutil"
"os"
"regexp"
"strconv"
"strings"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
)
var (
netLineRE = regexp.MustCompile(`^net \d+ (\d+) (\d+) (\d+)$`)
rpcLineRE = regexp.MustCompile(`^rpc (\d+) (\d+) (\d+)$`)
procLineRE = regexp.MustCompile(`^proc(\d+) \d+ (\d+( \d+)*)$`)
nfsProcedures = map[string][]string{
"2": []string{
"null", "getattr", "setattr", "root", "lookup",
"readlink", "read", "writecache", "write", "create",
"remove", "rename", "link", "symlink", "mkdir",
"rmdir", "readdir", "statfs",
},
"3": []string{
"null", "getattr", "setattr", "lookup", "access",
"readlink", "read", "write", "create", "mkdir",
"symlink", "mknod", "remove", "rmdir", "rename",
"link", "readdir", "readdirplus", "fsstat", "fsinfo",
"pathconf", "commit",
},
"4": []string{
"null", "read", "write", "commit", "open",
"open_confirm", "open_noattr", "open_downgrade",
"close", "setattr", "fsinfo", "renew", "setclientid",
"setclientid_confirm", "lock", "lockt", "locku",
"access", "getattr", "lookup", "lookup_root", "remove",
"rename", "link", "symlink", "create", "pathconf",
"statfs", "readlink", "readdir", "server_caps",
"delegreturn", "getacl", "setacl", "fs_locations",
"release_lockowner", "secinfo", "fsid_present",
"exchange_id", "create_session", "destroy_session",
"sequence", "get_lease_time", "reclaim_complete",
"layoutget", "getdeviceinfo", "layoutcommit",
"layoutreturn", "secinfo_no_name", "test_stateid",
"free_stateid", "getdevicelist",
"bind_conn_to_session", "destroy_clientid", "seek",
"allocate", "deallocate", "layoutstats", "clone",
"copy",
},
}
nfsNetReadsDesc = prometheus.NewDesc(
prometheus.BuildFQName(Namespace, "nfs", "net_reads"),
"Number of reads at the network layer.",
[]string{"protocol"},
nil,
)
nfsNetConnectionsDesc = prometheus.NewDesc(
prometheus.BuildFQName(Namespace, "nfs", "net_connections"),
"Number of connections at the network layer.",
[]string{"protocol"},
nil,
)
nfsRpcOperationsDesc = prometheus.NewDesc(
prometheus.BuildFQName(Namespace, "nfs", "rpc_operations"),
"Number of RPCs performed.",
nil,
nil,
)
nfsRpcRetransmissionsDesc = prometheus.NewDesc(
prometheus.BuildFQName(Namespace, "nfs", "rpc_retransmissions"),
"Number of RPC transmissions performed.",
nil,
nil,
)
nfsRpcAuthenticationRefreshesDesc = prometheus.NewDesc(
prometheus.BuildFQName(Namespace, "nfs", "rpc_authentication_refreshes"),
"Number of RPC authentication refreshes performed.",
nil,
nil,
)
nfsProceduresDesc = prometheus.NewDesc(
prometheus.BuildFQName(Namespace, "nfs", "procedures"),
"Number of NFS procedures invoked.",
[]string{"version", "procedure"},
nil,
)
)
type nfsCollector struct{}
func init() {
Factories["nfs"] = NewNfsCollector
}
func NewNfsCollector() (Collector, error) {
return &nfsCollector{}, nil
}
func (c *nfsCollector) Update(ch chan<- prometheus.Metric) (err error) {
statsFile := procFilePath("net/rpc/nfs")
content, err := ioutil.ReadFile(statsFile)
if err != nil {
if os.IsNotExist(err) {
log.Debugf("Not collecting NFS statistics, as %s does not exist: %s", statsFile)
return nil
}
return err
}
for _, line := range strings.Split(string(content), "\n") {
if fields := netLineRE.FindStringSubmatch(line); fields != nil {
value, _ := strconv.ParseFloat(fields[1], 64)
ch <- prometheus.MustNewConstMetric(
nfsNetReadsDesc, prometheus.CounterValue,
value, "udp")
value, _ = strconv.ParseFloat(fields[2], 64)
ch <- prometheus.MustNewConstMetric(
nfsNetReadsDesc, prometheus.CounterValue,
value, "tcp")
value, _ = strconv.ParseFloat(fields[3], 64)
ch <- prometheus.MustNewConstMetric(
nfsNetConnectionsDesc, prometheus.CounterValue,
value, "tcp")
} else if fields := rpcLineRE.FindStringSubmatch(line); fields != nil {
value, _ := strconv.ParseFloat(fields[1], 64)
ch <- prometheus.MustNewConstMetric(
nfsRpcOperationsDesc,
prometheus.CounterValue, value)
value, _ = strconv.ParseFloat(fields[2], 64)
ch <- prometheus.MustNewConstMetric(
nfsRpcRetransmissionsDesc,
prometheus.CounterValue, value)
value, _ = strconv.ParseFloat(fields[3], 64)
ch <- prometheus.MustNewConstMetric(
nfsRpcAuthenticationRefreshesDesc,
prometheus.CounterValue, value)
} else if fields := procLineRE.FindStringSubmatch(line); fields != nil {
version := fields[1]
for procedure, count := range strings.Split(fields[2], " ") {
value, _ := strconv.ParseFloat(count, 64)
ch <- prometheus.MustNewConstMetric(
nfsProceduresDesc,
prometheus.CounterValue,
value,
version,
nfsProcedures[version][procedure])
}
} else if line != "" {
return errors.New("Failed to parse line: " + line)
}
}
return nil
}

View file

@ -15,6 +15,7 @@ collectors=$(cat << COLLECTORS
meminfo_numa
netdev
netstat
nfs
sockstat
stat
textfile