Closes issue #261 on node_exporter. (#1403)

* Closes issue #261 on node_exporter.

Delegated mdstat parsing to procfs project. mdadm_linux.go now only exports the metrics.
-> Added disk labels: "fail", "spare", "active" to indicate disk status
-> hanged metric node_md_disks_total ==> node_md_disks_required
-> Removed test cases for mdadm_linux.go, as the functionality they tested for has been moved to procfs project.

Signed-off-by: Advait Bhatwadekar <advait123@ymail.com>
This commit is contained in:
Advait Bhatwadekar 2019-07-01 05:56:06 -04:00 committed by Ben Kochie
parent b4cc818347
commit 3f49b31101
7 changed files with 396 additions and 410 deletions

View file

@ -4,11 +4,15 @@
* The netdev collector CLI argument `--collector.netdev.ignored-devices` was renamed to `--collector.netdev.device-blacklist` in order to conform with the systemd collector. #1279
* The label named `state` on `node_systemd_service_restart_total` metrics was changed to `name` to better describe the metric. #1393
* Refactoring of the mdadm collector changes several metrics
- `node_md_disks_active` is removed
- `node_md_disks` now has a `state` label for "fail", "spare", "active" disks.
- `node_md_is_active` is replaced by `node_md_state` with a state set of "active", "inactive", "recovering", "resync".
### Changes
* [CHANGE] Add `--collector.netdev.device-whitelist`. #1279
* [CHANGE] Refactor mdadm collector #1403
* [FEATURE]
* [ENHANCEMENT]
* [BUGFIX] Renamed label `state` to `name` on `node_systemd_service_restart_total`. #1393

View file

@ -1078,6 +1078,7 @@ node_load5 0.37
node_md_blocks{device="md0"} 248896
node_md_blocks{device="md00"} 4.186624e+06
node_md_blocks{device="md10"} 3.14159265e+08
node_md_blocks{device="md101"} 322560
node_md_blocks{device="md11"} 4.190208e+06
node_md_blocks{device="md12"} 3.886394368e+09
node_md_blocks{device="md120"} 2.095104e+06
@ -1095,7 +1096,8 @@ node_md_blocks{device="md9"} 523968
node_md_blocks_synced{device="md0"} 248896
node_md_blocks_synced{device="md00"} 4.186624e+06
node_md_blocks_synced{device="md10"} 3.14159265e+08
node_md_blocks_synced{device="md11"} 4.190208e+06
node_md_blocks_synced{device="md101"} 322560
node_md_blocks_synced{device="md11"} 0
node_md_blocks_synced{device="md12"} 3.886394368e+09
node_md_blocks_synced{device="md120"} 2.095104e+06
node_md_blocks_synced{device="md126"} 1.855870976e+09
@ -1106,58 +1108,141 @@ node_md_blocks_synced{device="md4"} 4.883648e+06
node_md_blocks_synced{device="md6"} 1.6775552e+07
node_md_blocks_synced{device="md7"} 7.813735424e+09
node_md_blocks_synced{device="md8"} 1.6775552e+07
node_md_blocks_synced{device="md9"} 523968
# HELP node_md_disks Total number of disks of device.
node_md_blocks_synced{device="md9"} 0
# HELP node_md_disks Number of active/failed/spare disks of device.
# TYPE node_md_disks gauge
node_md_disks{device="md0"} 2
node_md_disks{device="md00"} 1
node_md_disks{device="md10"} 2
node_md_disks{device="md11"} 2
node_md_disks{device="md12"} 2
node_md_disks{device="md120"} 2
node_md_disks{device="md126"} 2
node_md_disks{device="md127"} 2
node_md_disks{device="md219"} 3
node_md_disks{device="md3"} 8
node_md_disks{device="md4"} 2
node_md_disks{device="md6"} 2
node_md_disks{device="md7"} 4
node_md_disks{device="md8"} 2
node_md_disks{device="md9"} 4
# HELP node_md_disks_active Number of active disks of device.
# TYPE node_md_disks_active gauge
node_md_disks_active{device="md0"} 2
node_md_disks_active{device="md00"} 1
node_md_disks_active{device="md10"} 2
node_md_disks_active{device="md11"} 2
node_md_disks_active{device="md12"} 2
node_md_disks_active{device="md120"} 2
node_md_disks_active{device="md126"} 2
node_md_disks_active{device="md127"} 2
node_md_disks_active{device="md219"} 0
node_md_disks_active{device="md3"} 8
node_md_disks_active{device="md4"} 0
node_md_disks_active{device="md6"} 1
node_md_disks_active{device="md7"} 3
node_md_disks_active{device="md8"} 2
node_md_disks_active{device="md9"} 4
# HELP node_md_is_active Indicator whether the md-device is active or not.
# TYPE node_md_is_active gauge
node_md_is_active{device="md0"} 1
node_md_is_active{device="md00"} 1
node_md_is_active{device="md10"} 1
node_md_is_active{device="md11"} 1
node_md_is_active{device="md12"} 1
node_md_is_active{device="md120"} 1
node_md_is_active{device="md126"} 1
node_md_is_active{device="md127"} 1
node_md_is_active{device="md219"} 0
node_md_is_active{device="md3"} 1
node_md_is_active{device="md4"} 0
node_md_is_active{device="md6"} 1
node_md_is_active{device="md7"} 1
node_md_is_active{device="md8"} 1
node_md_is_active{device="md9"} 1
node_md_disks{device="md0",state="active"} 2
node_md_disks{device="md0",state="failed"} 0
node_md_disks{device="md0",state="spare"} 0
node_md_disks{device="md00",state="active"} 1
node_md_disks{device="md00",state="failed"} 0
node_md_disks{device="md00",state="spare"} 0
node_md_disks{device="md10",state="active"} 2
node_md_disks{device="md10",state="failed"} 0
node_md_disks{device="md10",state="spare"} 0
node_md_disks{device="md101",state="active"} 3
node_md_disks{device="md101",state="failed"} 0
node_md_disks{device="md101",state="spare"} 0
node_md_disks{device="md11",state="active"} 2
node_md_disks{device="md11",state="failed"} 1
node_md_disks{device="md11",state="spare"} 2
node_md_disks{device="md12",state="active"} 2
node_md_disks{device="md12",state="failed"} 0
node_md_disks{device="md12",state="spare"} 0
node_md_disks{device="md120",state="active"} 2
node_md_disks{device="md120",state="failed"} 0
node_md_disks{device="md120",state="spare"} 0
node_md_disks{device="md126",state="active"} 2
node_md_disks{device="md126",state="failed"} 0
node_md_disks{device="md126",state="spare"} 0
node_md_disks{device="md127",state="active"} 2
node_md_disks{device="md127",state="failed"} 0
node_md_disks{device="md127",state="spare"} 0
node_md_disks{device="md219",state="active"} 0
node_md_disks{device="md219",state="failed"} 0
node_md_disks{device="md219",state="spare"} 3
node_md_disks{device="md3",state="active"} 8
node_md_disks{device="md3",state="failed"} 0
node_md_disks{device="md3",state="spare"} 2
node_md_disks{device="md4",state="active"} 0
node_md_disks{device="md4",state="failed"} 1
node_md_disks{device="md4",state="spare"} 1
node_md_disks{device="md6",state="active"} 1
node_md_disks{device="md6",state="failed"} 1
node_md_disks{device="md6",state="spare"} 1
node_md_disks{device="md7",state="active"} 3
node_md_disks{device="md7",state="failed"} 1
node_md_disks{device="md7",state="spare"} 0
node_md_disks{device="md8",state="active"} 2
node_md_disks{device="md8",state="failed"} 0
node_md_disks{device="md8",state="spare"} 2
node_md_disks{device="md9",state="active"} 4
node_md_disks{device="md9",state="failed"} 2
node_md_disks{device="md9",state="spare"} 1
# HELP node_md_disks_required Total number of disks of device.
# TYPE node_md_disks_required gauge
node_md_disks_required{device="md0"} 2
node_md_disks_required{device="md00"} 1
node_md_disks_required{device="md10"} 2
node_md_disks_required{device="md101"} 3
node_md_disks_required{device="md11"} 2
node_md_disks_required{device="md12"} 2
node_md_disks_required{device="md120"} 2
node_md_disks_required{device="md126"} 2
node_md_disks_required{device="md127"} 2
node_md_disks_required{device="md219"} 0
node_md_disks_required{device="md3"} 8
node_md_disks_required{device="md4"} 0
node_md_disks_required{device="md6"} 2
node_md_disks_required{device="md7"} 4
node_md_disks_required{device="md8"} 2
node_md_disks_required{device="md9"} 4
# HELP node_md_state Indicates the state of md-device.
# TYPE node_md_state gauge
node_md_state{device="md0",state="active"} 1
node_md_state{device="md0",state="inactive"} 0
node_md_state{device="md0",state="recovering"} 0
node_md_state{device="md0",state="resync"} 0
node_md_state{device="md00",state="active"} 1
node_md_state{device="md00",state="inactive"} 0
node_md_state{device="md00",state="recovering"} 0
node_md_state{device="md00",state="resync"} 0
node_md_state{device="md10",state="active"} 1
node_md_state{device="md10",state="inactive"} 0
node_md_state{device="md10",state="recovering"} 0
node_md_state{device="md10",state="resync"} 0
node_md_state{device="md101",state="active"} 1
node_md_state{device="md101",state="inactive"} 0
node_md_state{device="md101",state="recovering"} 0
node_md_state{device="md101",state="resync"} 0
node_md_state{device="md11",state="active"} 0
node_md_state{device="md11",state="inactive"} 0
node_md_state{device="md11",state="recovering"} 0
node_md_state{device="md11",state="resync"} 1
node_md_state{device="md12",state="active"} 1
node_md_state{device="md12",state="inactive"} 0
node_md_state{device="md12",state="recovering"} 0
node_md_state{device="md12",state="resync"} 0
node_md_state{device="md120",state="active"} 1
node_md_state{device="md120",state="inactive"} 0
node_md_state{device="md120",state="recovering"} 0
node_md_state{device="md120",state="resync"} 0
node_md_state{device="md126",state="active"} 1
node_md_state{device="md126",state="inactive"} 0
node_md_state{device="md126",state="recovering"} 0
node_md_state{device="md126",state="resync"} 0
node_md_state{device="md127",state="active"} 1
node_md_state{device="md127",state="inactive"} 0
node_md_state{device="md127",state="recovering"} 0
node_md_state{device="md127",state="resync"} 0
node_md_state{device="md219",state="active"} 0
node_md_state{device="md219",state="inactive"} 1
node_md_state{device="md219",state="recovering"} 0
node_md_state{device="md219",state="resync"} 0
node_md_state{device="md3",state="active"} 1
node_md_state{device="md3",state="inactive"} 0
node_md_state{device="md3",state="recovering"} 0
node_md_state{device="md3",state="resync"} 0
node_md_state{device="md4",state="active"} 0
node_md_state{device="md4",state="inactive"} 1
node_md_state{device="md4",state="recovering"} 0
node_md_state{device="md4",state="resync"} 0
node_md_state{device="md6",state="active"} 0
node_md_state{device="md6",state="inactive"} 0
node_md_state{device="md6",state="recovering"} 1
node_md_state{device="md6",state="resync"} 0
node_md_state{device="md7",state="active"} 1
node_md_state{device="md7",state="inactive"} 0
node_md_state{device="md7",state="recovering"} 0
node_md_state{device="md7",state="resync"} 0
node_md_state{device="md8",state="active"} 0
node_md_state{device="md8",state="inactive"} 0
node_md_state{device="md8",state="recovering"} 0
node_md_state{device="md8",state="resync"} 1
node_md_state{device="md9",state="active"} 0
node_md_state{device="md9",state="inactive"} 0
node_md_state{device="md9",state="recovering"} 0
node_md_state{device="md9",state="resync"} 1
# HELP node_memory_Active_anon_bytes Memory information field Active_anon_bytes.
# TYPE node_memory_Active_anon_bytes gauge
node_memory_Active_anon_bytes 2.068484096e+09

View file

@ -1078,6 +1078,7 @@ node_load5 0.37
node_md_blocks{device="md0"} 248896
node_md_blocks{device="md00"} 4.186624e+06
node_md_blocks{device="md10"} 3.14159265e+08
node_md_blocks{device="md101"} 322560
node_md_blocks{device="md11"} 4.190208e+06
node_md_blocks{device="md12"} 3.886394368e+09
node_md_blocks{device="md120"} 2.095104e+06
@ -1095,7 +1096,8 @@ node_md_blocks{device="md9"} 523968
node_md_blocks_synced{device="md0"} 248896
node_md_blocks_synced{device="md00"} 4.186624e+06
node_md_blocks_synced{device="md10"} 3.14159265e+08
node_md_blocks_synced{device="md11"} 4.190208e+06
node_md_blocks_synced{device="md101"} 322560
node_md_blocks_synced{device="md11"} 0
node_md_blocks_synced{device="md12"} 3.886394368e+09
node_md_blocks_synced{device="md120"} 2.095104e+06
node_md_blocks_synced{device="md126"} 1.855870976e+09
@ -1106,58 +1108,141 @@ node_md_blocks_synced{device="md4"} 4.883648e+06
node_md_blocks_synced{device="md6"} 1.6775552e+07
node_md_blocks_synced{device="md7"} 7.813735424e+09
node_md_blocks_synced{device="md8"} 1.6775552e+07
node_md_blocks_synced{device="md9"} 523968
# HELP node_md_disks Total number of disks of device.
node_md_blocks_synced{device="md9"} 0
# HELP node_md_disks Number of active/failed/spare disks of device.
# TYPE node_md_disks gauge
node_md_disks{device="md0"} 2
node_md_disks{device="md00"} 1
node_md_disks{device="md10"} 2
node_md_disks{device="md11"} 2
node_md_disks{device="md12"} 2
node_md_disks{device="md120"} 2
node_md_disks{device="md126"} 2
node_md_disks{device="md127"} 2
node_md_disks{device="md219"} 3
node_md_disks{device="md3"} 8
node_md_disks{device="md4"} 2
node_md_disks{device="md6"} 2
node_md_disks{device="md7"} 4
node_md_disks{device="md8"} 2
node_md_disks{device="md9"} 4
# HELP node_md_disks_active Number of active disks of device.
# TYPE node_md_disks_active gauge
node_md_disks_active{device="md0"} 2
node_md_disks_active{device="md00"} 1
node_md_disks_active{device="md10"} 2
node_md_disks_active{device="md11"} 2
node_md_disks_active{device="md12"} 2
node_md_disks_active{device="md120"} 2
node_md_disks_active{device="md126"} 2
node_md_disks_active{device="md127"} 2
node_md_disks_active{device="md219"} 0
node_md_disks_active{device="md3"} 8
node_md_disks_active{device="md4"} 0
node_md_disks_active{device="md6"} 1
node_md_disks_active{device="md7"} 3
node_md_disks_active{device="md8"} 2
node_md_disks_active{device="md9"} 4
# HELP node_md_is_active Indicator whether the md-device is active or not.
# TYPE node_md_is_active gauge
node_md_is_active{device="md0"} 1
node_md_is_active{device="md00"} 1
node_md_is_active{device="md10"} 1
node_md_is_active{device="md11"} 1
node_md_is_active{device="md12"} 1
node_md_is_active{device="md120"} 1
node_md_is_active{device="md126"} 1
node_md_is_active{device="md127"} 1
node_md_is_active{device="md219"} 0
node_md_is_active{device="md3"} 1
node_md_is_active{device="md4"} 0
node_md_is_active{device="md6"} 1
node_md_is_active{device="md7"} 1
node_md_is_active{device="md8"} 1
node_md_is_active{device="md9"} 1
node_md_disks{device="md0",state="active"} 2
node_md_disks{device="md0",state="failed"} 0
node_md_disks{device="md0",state="spare"} 0
node_md_disks{device="md00",state="active"} 1
node_md_disks{device="md00",state="failed"} 0
node_md_disks{device="md00",state="spare"} 0
node_md_disks{device="md10",state="active"} 2
node_md_disks{device="md10",state="failed"} 0
node_md_disks{device="md10",state="spare"} 0
node_md_disks{device="md101",state="active"} 3
node_md_disks{device="md101",state="failed"} 0
node_md_disks{device="md101",state="spare"} 0
node_md_disks{device="md11",state="active"} 2
node_md_disks{device="md11",state="failed"} 1
node_md_disks{device="md11",state="spare"} 2
node_md_disks{device="md12",state="active"} 2
node_md_disks{device="md12",state="failed"} 0
node_md_disks{device="md12",state="spare"} 0
node_md_disks{device="md120",state="active"} 2
node_md_disks{device="md120",state="failed"} 0
node_md_disks{device="md120",state="spare"} 0
node_md_disks{device="md126",state="active"} 2
node_md_disks{device="md126",state="failed"} 0
node_md_disks{device="md126",state="spare"} 0
node_md_disks{device="md127",state="active"} 2
node_md_disks{device="md127",state="failed"} 0
node_md_disks{device="md127",state="spare"} 0
node_md_disks{device="md219",state="active"} 0
node_md_disks{device="md219",state="failed"} 0
node_md_disks{device="md219",state="spare"} 3
node_md_disks{device="md3",state="active"} 8
node_md_disks{device="md3",state="failed"} 0
node_md_disks{device="md3",state="spare"} 2
node_md_disks{device="md4",state="active"} 0
node_md_disks{device="md4",state="failed"} 1
node_md_disks{device="md4",state="spare"} 1
node_md_disks{device="md6",state="active"} 1
node_md_disks{device="md6",state="failed"} 1
node_md_disks{device="md6",state="spare"} 1
node_md_disks{device="md7",state="active"} 3
node_md_disks{device="md7",state="failed"} 1
node_md_disks{device="md7",state="spare"} 0
node_md_disks{device="md8",state="active"} 2
node_md_disks{device="md8",state="failed"} 0
node_md_disks{device="md8",state="spare"} 2
node_md_disks{device="md9",state="active"} 4
node_md_disks{device="md9",state="failed"} 2
node_md_disks{device="md9",state="spare"} 1
# HELP node_md_disks_required Total number of disks of device.
# TYPE node_md_disks_required gauge
node_md_disks_required{device="md0"} 2
node_md_disks_required{device="md00"} 1
node_md_disks_required{device="md10"} 2
node_md_disks_required{device="md101"} 3
node_md_disks_required{device="md11"} 2
node_md_disks_required{device="md12"} 2
node_md_disks_required{device="md120"} 2
node_md_disks_required{device="md126"} 2
node_md_disks_required{device="md127"} 2
node_md_disks_required{device="md219"} 0
node_md_disks_required{device="md3"} 8
node_md_disks_required{device="md4"} 0
node_md_disks_required{device="md6"} 2
node_md_disks_required{device="md7"} 4
node_md_disks_required{device="md8"} 2
node_md_disks_required{device="md9"} 4
# HELP node_md_state Indicates the state of md-device.
# TYPE node_md_state gauge
node_md_state{device="md0",state="active"} 1
node_md_state{device="md0",state="inactive"} 0
node_md_state{device="md0",state="recovering"} 0
node_md_state{device="md0",state="resync"} 0
node_md_state{device="md00",state="active"} 1
node_md_state{device="md00",state="inactive"} 0
node_md_state{device="md00",state="recovering"} 0
node_md_state{device="md00",state="resync"} 0
node_md_state{device="md10",state="active"} 1
node_md_state{device="md10",state="inactive"} 0
node_md_state{device="md10",state="recovering"} 0
node_md_state{device="md10",state="resync"} 0
node_md_state{device="md101",state="active"} 1
node_md_state{device="md101",state="inactive"} 0
node_md_state{device="md101",state="recovering"} 0
node_md_state{device="md101",state="resync"} 0
node_md_state{device="md11",state="active"} 0
node_md_state{device="md11",state="inactive"} 0
node_md_state{device="md11",state="recovering"} 0
node_md_state{device="md11",state="resync"} 1
node_md_state{device="md12",state="active"} 1
node_md_state{device="md12",state="inactive"} 0
node_md_state{device="md12",state="recovering"} 0
node_md_state{device="md12",state="resync"} 0
node_md_state{device="md120",state="active"} 1
node_md_state{device="md120",state="inactive"} 0
node_md_state{device="md120",state="recovering"} 0
node_md_state{device="md120",state="resync"} 0
node_md_state{device="md126",state="active"} 1
node_md_state{device="md126",state="inactive"} 0
node_md_state{device="md126",state="recovering"} 0
node_md_state{device="md126",state="resync"} 0
node_md_state{device="md127",state="active"} 1
node_md_state{device="md127",state="inactive"} 0
node_md_state{device="md127",state="recovering"} 0
node_md_state{device="md127",state="resync"} 0
node_md_state{device="md219",state="active"} 0
node_md_state{device="md219",state="inactive"} 1
node_md_state{device="md219",state="recovering"} 0
node_md_state{device="md219",state="resync"} 0
node_md_state{device="md3",state="active"} 1
node_md_state{device="md3",state="inactive"} 0
node_md_state{device="md3",state="recovering"} 0
node_md_state{device="md3",state="resync"} 0
node_md_state{device="md4",state="active"} 0
node_md_state{device="md4",state="inactive"} 1
node_md_state{device="md4",state="recovering"} 0
node_md_state{device="md4",state="resync"} 0
node_md_state{device="md6",state="active"} 0
node_md_state{device="md6",state="inactive"} 0
node_md_state{device="md6",state="recovering"} 1
node_md_state{device="md6",state="resync"} 0
node_md_state{device="md7",state="active"} 1
node_md_state{device="md7",state="inactive"} 0
node_md_state{device="md7",state="recovering"} 0
node_md_state{device="md7",state="resync"} 0
node_md_state{device="md8",state="active"} 0
node_md_state{device="md8",state="inactive"} 0
node_md_state{device="md8",state="recovering"} 0
node_md_state{device="md8",state="resync"} 1
node_md_state{device="md9",state="active"} 0
node_md_state{device="md9",state="inactive"} 0
node_md_state{device="md9",state="recovering"} 0
node_md_state{device="md9",state="resync"} 1
# HELP node_memory_Active_anon_bytes Memory information field Active_anon_bytes.
# TYPE node_memory_Active_anon_bytes gauge
node_memory_Active_anon_bytes 2.068484096e+09

View file

@ -1,5 +1,6 @@
Personalities : [linear] [multipath] [raid0] [raid1] [raid6] [raid5] [raid4] [raid10]
md3 : active raid6 sda1[8] sdh1[7] sdg1[6] sdf1[5] sde1[11] sdd1[3] sdc1[10] sdb1[9]
md3 : active raid6 sda1[8] sdh1[7] sdg1[6] sdf1[5] sde1[11] sdd1[3] sdc1[10] sdb1[9] sdd1[10](S) sdd2[11](S)
5853468288 blocks super 1.2 level 6, 64k chunk, algorithm 2 [8/8] [UUUUUUUU]
md127 : active raid1 sdi2[0] sdj2[1]
@ -8,31 +9,31 @@ md127 : active raid1 sdi2[0] sdj2[1]
md0 : active raid1 sdi1[0] sdj1[1]
248896 blocks [2/2] [UU]
md4 : inactive raid1 sda3[0] sdb3[1]
md4 : inactive raid1 sda3[0](F) sdb3[1](S)
4883648 blocks [2/2] [UU]
md6 : active raid1 sdb2[2] sda2[0]
md6 : active raid1 sdb2[2](F) sdc[1](S) sda2[0]
195310144 blocks [2/1] [U_]
[=>...................] recovery = 8.5% (16775552/195310144) finish=17.0min speed=259783K/sec
md8 : active raid1 sdb1[1] sda1[0]
195310144 blocks [2/2] [UU]
[=>...................] resync = 8.5% (16775552/195310144) finish=17.0min speed=259783K/sec
md8 : active raid1 sdb1[1] sda1[0] sdc[2](S) sde[3](S)
195310144 blocks [2/2] [UU]
[=>...................] resync = 8.5% (16775552/195310144) finish=17.0min speed=259783K/sec
md7 : active raid6 sdb1[0] sde1[3] sdd1[2] sdc1[1]
md7 : active raid6 sdb1[0] sde1[3] sdd1[2] sdc1[1](F)
7813735424 blocks super 1.2 level 6, 512k chunk, algorithm 2 [4/3] [U_UU]
bitmap: 0/30 pages [0KB], 65536KB chunk
md9 : active raid1 sdc2[2] sdd2[3] sdb2[1] sda2[0]
md9 : active raid1 sdc2[2] sdd2[3] sdb2[1] sda2[0] sde[4](F) sdf[5](F) sdg[6](S)
523968 blocks super 1.2 [4/4] [UUUU]
resync=DELAYED
resync=DELAYED
md10 : active raid0 sda1[0] sdb1[1]
314159265 blocks 64k chunks
314159265 blocks 64k chunks
md11 : active (auto-read-only) raid1 sdb2[0] sdc2[1]
md11 : active (auto-read-only) raid1 sdb2[0] sdc2[1] sdc3[2](F) hda[4](S) ssdc2[3](S)
4190208 blocks super 1.2 [2/2] [UU]
resync=PENDING
resync=PENDING
md12 : active raid0 sdc2[0] sdd2[1]
3886394368 blocks super 1.2 512k chunks
@ -41,12 +42,15 @@ md126 : active raid0 sdb[1] sdc[0]
1855870976 blocks super external:/md127/0 128k chunks
md219 : inactive sdb[2](S) sdc[1](S) sda[0](S)
7932 blocks super external:imsm
7932 blocks super external:imsm
md00 : active raid0 xvdb[0]
4186624 blocks super 1.2 256k chunks
md120 : active linear sda1[1] sdb1[0]
2095104 blocks super 1.2 0k rounding
2095104 blocks super 1.2 0k rounding
md101 : active (read-only) raid0 sdb[2] sdd[1] sdc[0]
322560 blocks super 1.2 512k chunks
unused devices: <none>

View file

@ -1,5 +0,0 @@
Personalities : [invalid]
md3 : invalid
314159265 blocks 64k chunks
unused devices: <none>

View file

@ -17,229 +17,59 @@ package collector
import (
"fmt"
"io/ioutil"
"os"
"regexp"
"strconv"
"strings"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
"github.com/prometheus/procfs"
)
var (
statuslineRE = regexp.MustCompile(`(\d+) blocks .*\[(\d+)/(\d+)\] \[[U_]+\]`)
raid0lineRE = regexp.MustCompile(`(\d+) blocks .*\d+k (chunks|rounding)`)
buildlineRE = regexp.MustCompile(`\((\d+)/\d+\)`)
unknownPersonalityLineRE = regexp.MustCompile(`(\d+) blocks (.*)`)
raidPersonalityRE = regexp.MustCompile(`^(linear|raid[0-9]+)$`)
)
type mdStatus struct {
name string
active bool
disksActive int64
disksTotal int64
blocksTotal int64
blocksSynced int64
}
type mdadmCollector struct{}
func init() {
registerCollector("mdadm", defaultEnabled, NewMdadmCollector)
}
func evalStatusline(statusline string) (active, total, size int64, err error) {
matches := statuslineRE.FindStringSubmatch(statusline)
// +1 to make it more obvious that the whole string containing the info is also returned as matches[0].
if len(matches) < 3+1 {
return 0, 0, 0, fmt.Errorf("too few matches found in statusline: %s", statusline)
} else if len(matches) > 3+1 {
return 0, 0, 0, fmt.Errorf("too many matches found in statusline: %s", statusline)
}
size, err = strconv.ParseInt(matches[1], 10, 64)
if err != nil {
return 0, 0, 0, fmt.Errorf("%s in statusline: %s", err, statusline)
}
total, err = strconv.ParseInt(matches[2], 10, 64)
if err != nil {
return 0, 0, 0, fmt.Errorf("%s in statusline: %s", err, statusline)
}
active, err = strconv.ParseInt(matches[3], 10, 64)
if err != nil {
return 0, 0, 0, fmt.Errorf("%s in statusline: %s", err, statusline)
}
return active, total, size, nil
}
func evalRaid0line(statusline string) (size int64, err error) {
matches := raid0lineRE.FindStringSubmatch(statusline)
if len(matches) < 2 {
return 0, fmt.Errorf("invalid raid0 status line: %s", statusline)
}
size, err = strconv.ParseInt(matches[1], 10, 64)
if err != nil {
return 0, fmt.Errorf("%s in statusline: %s", err, statusline)
}
return size, nil
}
func evalUnknownPersonalitylineRE(statusline string) (size int64, err error) {
matches := unknownPersonalityLineRE.FindStringSubmatch(statusline)
if len(matches) != 2+1 {
return 0, fmt.Errorf("invalid unknown personality status line: %s", statusline)
}
size, err = strconv.ParseInt(matches[1], 10, 64)
if err != nil {
return 0, fmt.Errorf("%s in statusline: %s", err, statusline)
}
return size, nil
}
// evalBuildline gets the size that has already been synced out of the sync-line.
func evalBuildline(buildline string) (int64, error) {
matches := buildlineRE.FindStringSubmatch(buildline)
// +1 to make it more obvious that the whole string containing the info is also returned as matches[0].
if len(matches) < 1+1 {
return 0, fmt.Errorf("too few matches found in buildline: %s", buildline)
}
if len(matches) > 1+1 {
return 0, fmt.Errorf("too many matches found in buildline: %s", buildline)
}
syncedSize, err := strconv.ParseInt(matches[1], 10, 64)
if err != nil {
return 0, fmt.Errorf("%s in buildline: %s", err, buildline)
}
return syncedSize, nil
}
// parseMdstat parses an mdstat-file and returns a struct with the relevant infos.
func parseMdstat(mdStatusFilePath string) ([]mdStatus, error) {
content, err := ioutil.ReadFile(mdStatusFilePath)
if err != nil {
return []mdStatus{}, err
}
lines := strings.Split(string(content), "\n")
// Each md has at least the deviceline, statusline and one empty line afterwards
// so we will have probably something of the order len(lines)/3 devices
// so we use that for preallocation.
mdStates := make([]mdStatus, 0, len(lines)/3)
for i, line := range lines {
if line == "" {
continue
}
if line[0] == ' ' || line[0] == '\t' {
// Lines starting with white space are not the beginning of a md-section.
continue
}
if strings.HasPrefix(line, "Personalities") || strings.HasPrefix(line, "unused") {
// These lines contain general information.
continue
}
mainLine := strings.Split(line, " ")
if len(mainLine) < 4 {
return mdStates, fmt.Errorf("error parsing mdline: %s", line)
}
md := mdStatus{
name: mainLine[0],
active: mainLine[2] == "active",
}
if len(lines) <= i+3 {
return mdStates, fmt.Errorf("error parsing mdstat: entry for %s has fewer lines than expected", md.name)
}
personality := ""
for _, possiblePersonality := range mainLine[3:] {
if raidPersonalityRE.MatchString(possiblePersonality) {
personality = possiblePersonality
break
}
}
switch {
case personality == "raid0" || personality == "linear":
md.disksActive = int64(len(mainLine) - 4) // Get the number of devices from the main line.
md.disksTotal = md.disksActive // Raid0 active and total is always the same if active.
md.blocksTotal, err = evalRaid0line(lines[i+1])
case raidPersonalityRE.MatchString(personality):
md.disksActive, md.disksTotal, md.blocksTotal, err = evalStatusline(lines[i+1])
default:
log.Debugf("Personality unknown: %s", mainLine)
md.disksTotal = int64(len(mainLine) - 3)
md.blocksTotal, err = evalUnknownPersonalitylineRE(lines[i+1])
}
if err != nil {
return mdStates, fmt.Errorf("error parsing mdstat: %s", err)
}
if !md.active {
md.disksActive = 0
}
syncLine := lines[i+2]
if strings.Contains(syncLine, "bitmap") {
syncLine = lines[i+3]
}
// If device is syncing at the moment, get the number of currently synced bytes,
// otherwise that number equals the size of the device.
if strings.Contains(syncLine, "recovery") ||
strings.Contains(syncLine, "resync") &&
!strings.Contains(syncLine, "\tresync=") {
md.blocksSynced, err = evalBuildline(syncLine)
if err != nil {
return mdStates, fmt.Errorf("error parsing mdstat: %s", err)
}
} else {
md.blocksSynced = md.blocksTotal
}
mdStates = append(mdStates, md)
}
return mdStates, nil
}
// NewMdadmCollector returns a new Collector exposing raid statistics.
func NewMdadmCollector() (Collector, error) {
return &mdadmCollector{}, nil
}
var (
isActiveDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "md", "is_active"),
"Indicator whether the md-device is active or not.",
activeDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "md", "state"),
"Indicates the state of md-device.",
[]string{"device"},
nil,
prometheus.Labels{"state": "active"},
)
inActiveDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "md", "state"),
"Indicates the state of md-device.",
[]string{"device"},
prometheus.Labels{"state": "inactive"},
)
recoveringDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "md", "state"),
"Indicates the state of md-device.",
[]string{"device"},
prometheus.Labels{"state": "recovering"},
)
resyncDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "md", "state"),
"Indicates the state of md-device.",
[]string{"device"},
prometheus.Labels{"state": "resync"},
)
disksActiveDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "md", "disks_active"),
"Number of active disks of device.",
[]string{"device"},
disksDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "md", "disks"),
"Number of active/failed/spare disks of device.",
[]string{"device", "state"},
nil,
)
disksTotalDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "md", "disks"),
prometheus.BuildFQName(namespace, "md", "disks_required"),
"Total number of disks of device.",
[]string{"device"},
nil,
@ -261,52 +91,96 @@ var (
)
func (c *mdadmCollector) Update(ch chan<- prometheus.Metric) error {
statusfile := procFilePath("mdstat")
mdstate, err := parseMdstat(statusfile)
fs, errFs := procfs.NewFS(*procPath)
if errFs != nil {
return fmt.Errorf("failed to open procfs: %v", errFs)
}
mdStats, err := fs.MDStat()
if err != nil {
if os.IsNotExist(err) {
log.Debugf("Not collecting mdstat, file does not exist: %s", statusfile)
log.Debugf("Not collecting mdstat, file does not exist: %s", *procPath)
return nil
}
return fmt.Errorf("error parsing mdstatus: %s", err)
}
for _, mds := range mdstate {
log.Debugf("collecting metrics for device %s", mds.name)
for _, mdStat := range mdStats {
log.Debugf("collecting metrics for device %s", mdStat.Name)
stateVals := make(map[string]float64)
stateVals[mdStat.ActivityState] = 1
var active float64
if mds.active {
active = 1
}
ch <- prometheus.MustNewConstMetric(
isActiveDesc,
prometheus.GaugeValue,
active,
mds.name,
)
ch <- prometheus.MustNewConstMetric(
disksActiveDesc,
prometheus.GaugeValue,
float64(mds.disksActive),
mds.name,
)
ch <- prometheus.MustNewConstMetric(
disksTotalDesc,
prometheus.GaugeValue,
float64(mds.disksTotal),
mds.name,
float64(mdStat.DisksTotal),
mdStat.Name,
)
ch <- prometheus.MustNewConstMetric(
disksDesc,
prometheus.GaugeValue,
float64(mdStat.DisksActive),
mdStat.Name,
"active",
)
ch <- prometheus.MustNewConstMetric(
disksDesc,
prometheus.GaugeValue,
float64(mdStat.DisksFailed),
mdStat.Name,
"failed",
)
ch <- prometheus.MustNewConstMetric(
disksDesc,
prometheus.GaugeValue,
float64(mdStat.DisksSpare),
mdStat.Name,
"spare",
)
ch <- prometheus.MustNewConstMetric(
activeDesc,
prometheus.GaugeValue,
stateVals["active"],
mdStat.Name,
)
ch <- prometheus.MustNewConstMetric(
inActiveDesc,
prometheus.GaugeValue,
stateVals["inactive"],
mdStat.Name,
)
ch <- prometheus.MustNewConstMetric(
recoveringDesc,
prometheus.GaugeValue,
stateVals["recovering"],
mdStat.Name,
)
ch <- prometheus.MustNewConstMetric(
resyncDesc,
prometheus.GaugeValue,
stateVals["resyncing"],
mdStat.Name,
)
ch <- prometheus.MustNewConstMetric(
blocksTotalDesc,
prometheus.GaugeValue,
float64(mds.blocksTotal),
mds.name,
float64(mdStat.BlocksTotal),
mdStat.Name,
)
ch <- prometheus.MustNewConstMetric(
blocksSyncedDesc,
prometheus.GaugeValue,
float64(mds.blocksSynced),
mds.name,
float64(mdStat.BlocksSynced),
mdStat.Name,
)
}

View file

@ -1,61 +0,0 @@
// Copyright 2015 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"testing"
)
func TestMdadm(t *testing.T) {
mdStates, err := parseMdstat("fixtures/proc/mdstat")
if err != nil {
t.Fatalf("parsing of reference-file failed entirely: %s", err)
}
refs := map[string]mdStatus{
// { "<name>", <active?>, <numDisksActive>, <totalNumDisks>, <amountSynced>, <totalSize>}
"md3": {"md3", true, 8, 8, 5853468288, 5853468288},
"md127": {"md127", true, 2, 2, 312319552, 312319552},
"md0": {"md0", true, 2, 2, 248896, 248896},
"md4": {"md4", false, 0, 2, 4883648, 4883648},
"md6": {"md6", true, 1, 2, 195310144, 16775552},
"md8": {"md8", true, 2, 2, 195310144, 16775552},
"md7": {"md7", true, 3, 4, 7813735424, 7813735424},
"md9": {"md9", true, 4, 4, 523968, 523968},
"md10": {"md10", true, 2, 2, 314159265, 314159265},
"md11": {"md11", true, 2, 2, 4190208, 4190208},
"md12": {"md12", true, 2, 2, 3886394368, 3886394368},
"md120": {"md120", true, 2, 2, 2095104, 2095104},
"md126": {"md126", true, 2, 2, 1855870976, 1855870976},
"md219": {"md219", false, 0, 3, 7932, 7932},
"md00": {"md00", true, 1, 1, 4186624, 4186624},
}
for _, md := range mdStates {
if md != refs[md.name] {
t.Errorf("failed parsing md-device %s correctly: want %v, got %v", md.name, refs[md.name], md)
}
}
if len(mdStates) != len(refs) {
t.Errorf("expected number of parsed md-device to be %d, but was %d", len(refs), len(mdStates))
}
}
func TestInvalidMdstat(t *testing.T) {
_, err := parseMdstat("fixtures/proc/mdstat_invalid")
if err == nil {
t.Fatalf("parsing of invalid reference file did not find any errors")
}
}