From a696830c38f8dcb171d36651d0b10fe58cf28d4c Mon Sep 17 00:00:00 2001 From: Ed Schouten Date: Tue, 29 Nov 2016 14:32:52 +0100 Subject: [PATCH] Add a collector for NFS client statistics. This change adds a new collector called "nfs" that parses the contents of /proc/net/rpc/nfs and turns it into metrics. It can be used to inspect the number of operations per type, but also to keep an eye on an extraneous number of retransmissions, which may indicate connectivity issues. I've picked the name "nfs", as most operating systems use "nfs" for the client component and "nfsd" as the server component. If we want to add stats for the NFS server as well, we'd better call such a collector "nfsd". --- README.md | 1 + collector/fixtures/e2e-output.txt | 106 +++++++++++++++++ collector/fixtures/proc/net/rpc/nfs | 5 + collector/nfs_linux.go | 176 ++++++++++++++++++++++++++++ end-to-end-test.sh | 1 + 5 files changed, 289 insertions(+) create mode 100644 collector/fixtures/proc/net/rpc/nfs create mode 100644 collector/nfs_linux.go diff --git a/README.md b/README.md index a1037dcf..507851c1 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,7 @@ ksmd | Exposes kernel and system statistics from `/sys/kernel/mm/ksm`. | Linux logind | Exposes session counts from [logind](http://www.freedesktop.org/wiki/Software/systemd/logind/). | Linux megacli | Exposes RAID statistics from MegaCLI. | Linux meminfo_numa | Exposes memory statistics from `/proc/meminfo_numa`. | Linux +nfs | Exposes NFS client statistics from `/proc/net/rpc/nfs`. This is the same information as `nfsstat -c`. | Linux ntp | Exposes time drift from an NTP server. | _any_ runit | Exposes service status from [runit](http://smarden.org/runit/). | _any_ supervisord | Exposes service status from [supervisord](http://supervisord.org/). | _any_ diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index d3bfc6c1..baf8f972 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -1743,6 +1743,112 @@ node_nf_conntrack_entries 123 # HELP node_nf_conntrack_entries_limit Maximum size of connection tracking table. # TYPE node_nf_conntrack_entries_limit gauge node_nf_conntrack_entries_limit 65536 +# HELP node_nfs_net_connections Number of connections at the network layer. +# TYPE node_nfs_net_connections counter +node_nfs_net_connections{protocol="tcp"} 45 +# HELP node_nfs_net_reads Number of reads at the network layer. +# TYPE node_nfs_net_reads counter +node_nfs_net_reads{protocol="tcp"} 69 +node_nfs_net_reads{protocol="udp"} 70 +# HELP node_nfs_procedures Number of NFS procedures invoked. +# TYPE node_nfs_procedures counter +node_nfs_procedures{procedure="access",version="3"} 1.17661341e+08 +node_nfs_procedures{procedure="access",version="4"} 58 +node_nfs_procedures{procedure="close",version="4"} 28 +node_nfs_procedures{procedure="commit",version="3"} 23729 +node_nfs_procedures{procedure="commit",version="4"} 83 +node_nfs_procedures{procedure="create",version="2"} 52 +node_nfs_procedures{procedure="create",version="3"} 2.993289e+06 +node_nfs_procedures{procedure="create",version="4"} 15 +node_nfs_procedures{procedure="create_session",version="4"} 32 +node_nfs_procedures{procedure="delegreturn",version="4"} 97 +node_nfs_procedures{procedure="destroy_session",version="4"} 67 +node_nfs_procedures{procedure="exchange_id",version="4"} 58 +node_nfs_procedures{procedure="fs_locations",version="4"} 32 +node_nfs_procedures{procedure="fsid_present",version="4"} 11 +node_nfs_procedures{procedure="fsinfo",version="3"} 2 +node_nfs_procedures{procedure="fsinfo",version="4"} 68 +node_nfs_procedures{procedure="fsstat",version="3"} 13332 +node_nfs_procedures{procedure="get_lease_time",version="4"} 28 +node_nfs_procedures{procedure="getacl",version="4"} 36 +node_nfs_procedures{procedure="getattr",version="2"} 57 +node_nfs_procedures{procedure="getattr",version="3"} 1.061909262e+09 +node_nfs_procedures{procedure="getattr",version="4"} 88 +node_nfs_procedures{procedure="getdeviceinfo",version="4"} 1 +node_nfs_procedures{procedure="layoutcommit",version="4"} 26 +node_nfs_procedures{procedure="layoutget",version="4"} 90 +node_nfs_procedures{procedure="layoutreturn",version="4"} 0 +node_nfs_procedures{procedure="link",version="2"} 17 +node_nfs_procedures{procedure="link",version="3"} 0 +node_nfs_procedures{procedure="link",version="4"} 21 +node_nfs_procedures{procedure="lock",version="4"} 39 +node_nfs_procedures{procedure="lockt",version="4"} 68 +node_nfs_procedures{procedure="locku",version="4"} 59 +node_nfs_procedures{procedure="lookup",version="2"} 71 +node_nfs_procedures{procedure="lookup",version="3"} 4.077635e+06 +node_nfs_procedures{procedure="lookup",version="4"} 29 +node_nfs_procedures{procedure="lookup_root",version="4"} 74 +node_nfs_procedures{procedure="mkdir",version="2"} 50 +node_nfs_procedures{procedure="mkdir",version="3"} 590 +node_nfs_procedures{procedure="mknod",version="3"} 0 +node_nfs_procedures{procedure="null",version="2"} 16 +node_nfs_procedures{procedure="null",version="3"} 0 +node_nfs_procedures{procedure="null",version="4"} 98 +node_nfs_procedures{procedure="open",version="4"} 85 +node_nfs_procedures{procedure="open_confirm",version="4"} 23 +node_nfs_procedures{procedure="open_downgrade",version="4"} 1 +node_nfs_procedures{procedure="open_noattr",version="4"} 24 +node_nfs_procedures{procedure="pathconf",version="3"} 1 +node_nfs_procedures{procedure="pathconf",version="4"} 53 +node_nfs_procedures{procedure="read",version="2"} 45 +node_nfs_procedures{procedure="read",version="3"} 2.9391916e+07 +node_nfs_procedures{procedure="read",version="4"} 51 +node_nfs_procedures{procedure="readdir",version="2"} 70 +node_nfs_procedures{procedure="readdir",version="3"} 3983 +node_nfs_procedures{procedure="readdir",version="4"} 66 +node_nfs_procedures{procedure="readdirplus",version="3"} 92385 +node_nfs_procedures{procedure="readlink",version="2"} 73 +node_nfs_procedures{procedure="readlink",version="3"} 5 +node_nfs_procedures{procedure="readlink",version="4"} 54 +node_nfs_procedures{procedure="reclaim_complete",version="4"} 35 +node_nfs_procedures{procedure="release_lockowner",version="4"} 85 +node_nfs_procedures{procedure="remove",version="2"} 83 +node_nfs_procedures{procedure="remove",version="3"} 7815 +node_nfs_procedures{procedure="remove",version="4"} 69 +node_nfs_procedures{procedure="rename",version="2"} 61 +node_nfs_procedures{procedure="rename",version="3"} 1130 +node_nfs_procedures{procedure="rename",version="4"} 96 +node_nfs_procedures{procedure="renew",version="4"} 83 +node_nfs_procedures{procedure="rmdir",version="2"} 23 +node_nfs_procedures{procedure="rmdir",version="3"} 15 +node_nfs_procedures{procedure="root",version="2"} 52 +node_nfs_procedures{procedure="secinfo",version="4"} 81 +node_nfs_procedures{procedure="sequence",version="4"} 13 +node_nfs_procedures{procedure="server_caps",version="4"} 56 +node_nfs_procedures{procedure="setacl",version="4"} 49 +node_nfs_procedures{procedure="setattr",version="2"} 74 +node_nfs_procedures{procedure="setattr",version="3"} 48906 +node_nfs_procedures{procedure="setattr",version="4"} 73 +node_nfs_procedures{procedure="setclientid",version="4"} 12 +node_nfs_procedures{procedure="setclientid_confirm",version="4"} 84 +node_nfs_procedures{procedure="statfs",version="2"} 82 +node_nfs_procedures{procedure="statfs",version="4"} 86 +node_nfs_procedures{procedure="symlink",version="2"} 53 +node_nfs_procedures{procedure="symlink",version="3"} 0 +node_nfs_procedures{procedure="symlink",version="4"} 84 +node_nfs_procedures{procedure="write",version="2"} 0 +node_nfs_procedures{procedure="write",version="3"} 2.570425e+06 +node_nfs_procedures{procedure="write",version="4"} 54 +node_nfs_procedures{procedure="writecache",version="2"} 86 +# HELP node_nfs_rpc_authentication_refreshes Number of RPC authentication refreshes performed. +# TYPE node_nfs_rpc_authentication_refreshes counter +node_nfs_rpc_authentication_refreshes 1.218815394e+09 +# HELP node_nfs_rpc_operations Number of RPCs performed. +# TYPE node_nfs_rpc_operations counter +node_nfs_rpc_operations 1.218785755e+09 +# HELP node_nfs_rpc_retransmissions Number of RPC transmissions performed. +# TYPE node_nfs_rpc_retransmissions counter +node_nfs_rpc_retransmissions 374636 # HELP node_procs_blocked Number of processes blocked waiting for I/O to complete. # TYPE node_procs_blocked gauge node_procs_blocked 0 diff --git a/collector/fixtures/proc/net/rpc/nfs b/collector/fixtures/proc/net/rpc/nfs new file mode 100644 index 00000000..ba2efd3d --- /dev/null +++ b/collector/fixtures/proc/net/rpc/nfs @@ -0,0 +1,5 @@ +net 70 70 69 45 +rpc 1218785755 374636 1218815394 +proc2 18 16 57 74 52 71 73 45 86 0 52 83 61 17 53 50 23 70 82 +proc3 22 0 1061909262 48906 4077635 117661341 5 29391916 2570425 2993289 590 0 0 7815 15 1130 0 3983 92385 13332 2 1 23729 +proc4 48 98 51 54 83 85 23 24 1 28 73 68 83 12 84 39 68 59 58 88 29 74 69 96 21 84 15 53 86 54 66 56 97 36 49 32 85 81 11 58 32 67 13 28 35 90 1 26 0 diff --git a/collector/nfs_linux.go b/collector/nfs_linux.go new file mode 100644 index 00000000..70083d32 --- /dev/null +++ b/collector/nfs_linux.go @@ -0,0 +1,176 @@ +// Copyright 2016 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "errors" + "io/ioutil" + "os" + "regexp" + "strconv" + "strings" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/log" +) + +var ( + netLineRE = regexp.MustCompile(`^net \d+ (\d+) (\d+) (\d+)$`) + rpcLineRE = regexp.MustCompile(`^rpc (\d+) (\d+) (\d+)$`) + procLineRE = regexp.MustCompile(`^proc(\d+) \d+ (\d+( \d+)*)$`) + + nfsProcedures = map[string][]string{ + "2": []string{ + "null", "getattr", "setattr", "root", "lookup", + "readlink", "read", "writecache", "write", "create", + "remove", "rename", "link", "symlink", "mkdir", + "rmdir", "readdir", "statfs", + }, + "3": []string{ + "null", "getattr", "setattr", "lookup", "access", + "readlink", "read", "write", "create", "mkdir", + "symlink", "mknod", "remove", "rmdir", "rename", + "link", "readdir", "readdirplus", "fsstat", "fsinfo", + "pathconf", "commit", + }, + "4": []string{ + "null", "read", "write", "commit", "open", + "open_confirm", "open_noattr", "open_downgrade", + "close", "setattr", "fsinfo", "renew", "setclientid", + "setclientid_confirm", "lock", "lockt", "locku", + "access", "getattr", "lookup", "lookup_root", "remove", + "rename", "link", "symlink", "create", "pathconf", + "statfs", "readlink", "readdir", "server_caps", + "delegreturn", "getacl", "setacl", "fs_locations", + "release_lockowner", "secinfo", "fsid_present", + "exchange_id", "create_session", "destroy_session", + "sequence", "get_lease_time", "reclaim_complete", + "layoutget", "getdeviceinfo", "layoutcommit", + "layoutreturn", "secinfo_no_name", "test_stateid", + "free_stateid", "getdevicelist", + "bind_conn_to_session", "destroy_clientid", "seek", + "allocate", "deallocate", "layoutstats", "clone", + "copy", + }, + } + + nfsNetReadsDesc = prometheus.NewDesc( + prometheus.BuildFQName(Namespace, "nfs", "net_reads"), + "Number of reads at the network layer.", + []string{"protocol"}, + nil, + ) + nfsNetConnectionsDesc = prometheus.NewDesc( + prometheus.BuildFQName(Namespace, "nfs", "net_connections"), + "Number of connections at the network layer.", + []string{"protocol"}, + nil, + ) + + nfsRpcOperationsDesc = prometheus.NewDesc( + prometheus.BuildFQName(Namespace, "nfs", "rpc_operations"), + "Number of RPCs performed.", + nil, + nil, + ) + nfsRpcRetransmissionsDesc = prometheus.NewDesc( + prometheus.BuildFQName(Namespace, "nfs", "rpc_retransmissions"), + "Number of RPC transmissions performed.", + nil, + nil, + ) + nfsRpcAuthenticationRefreshesDesc = prometheus.NewDesc( + prometheus.BuildFQName(Namespace, "nfs", "rpc_authentication_refreshes"), + "Number of RPC authentication refreshes performed.", + nil, + nil, + ) + + nfsProceduresDesc = prometheus.NewDesc( + prometheus.BuildFQName(Namespace, "nfs", "procedures"), + "Number of NFS procedures invoked.", + []string{"version", "procedure"}, + nil, + ) +) + +type nfsCollector struct{} + +func init() { + Factories["nfs"] = NewNfsCollector +} + +func NewNfsCollector() (Collector, error) { + return &nfsCollector{}, nil +} + +func (c *nfsCollector) Update(ch chan<- prometheus.Metric) (err error) { + statsFile := procFilePath("net/rpc/nfs") + content, err := ioutil.ReadFile(statsFile) + if err != nil { + if os.IsNotExist(err) { + log.Debugf("Not collecting NFS statistics, as %s does not exist: %s", statsFile) + return nil + } + return err + } + + for _, line := range strings.Split(string(content), "\n") { + if fields := netLineRE.FindStringSubmatch(line); fields != nil { + value, _ := strconv.ParseFloat(fields[1], 64) + ch <- prometheus.MustNewConstMetric( + nfsNetReadsDesc, prometheus.CounterValue, + value, "udp") + + value, _ = strconv.ParseFloat(fields[2], 64) + ch <- prometheus.MustNewConstMetric( + nfsNetReadsDesc, prometheus.CounterValue, + value, "tcp") + + value, _ = strconv.ParseFloat(fields[3], 64) + ch <- prometheus.MustNewConstMetric( + nfsNetConnectionsDesc, prometheus.CounterValue, + value, "tcp") + } else if fields := rpcLineRE.FindStringSubmatch(line); fields != nil { + value, _ := strconv.ParseFloat(fields[1], 64) + ch <- prometheus.MustNewConstMetric( + nfsRpcOperationsDesc, + prometheus.CounterValue, value) + + value, _ = strconv.ParseFloat(fields[2], 64) + ch <- prometheus.MustNewConstMetric( + nfsRpcRetransmissionsDesc, + prometheus.CounterValue, value) + + value, _ = strconv.ParseFloat(fields[3], 64) + ch <- prometheus.MustNewConstMetric( + nfsRpcAuthenticationRefreshesDesc, + prometheus.CounterValue, value) + } else if fields := procLineRE.FindStringSubmatch(line); fields != nil { + version := fields[1] + for procedure, count := range strings.Split(fields[2], " ") { + value, _ := strconv.ParseFloat(count, 64) + ch <- prometheus.MustNewConstMetric( + nfsProceduresDesc, + prometheus.CounterValue, + value, + version, + nfsProcedures[version][procedure]) + } + } else if line != "" { + return errors.New("Failed to parse line: " + line) + } + } + return nil +} diff --git a/end-to-end-test.sh b/end-to-end-test.sh index 4e4ba412..ce6351e9 100755 --- a/end-to-end-test.sh +++ b/end-to-end-test.sh @@ -15,6 +15,7 @@ collectors=$(cat << COLLECTORS meminfo_numa netdev netstat + nfs sockstat stat textfile