From e4ec8e04c521a7695192b42acfe029841ca7176c Mon Sep 17 00:00:00 2001 From: paulfantom Date: Mon, 27 Jul 2020 11:58:36 +0200 Subject: [PATCH] docs/node-mixin: add alerts about failing RAID array Signed-off-by: paulfantom --- docs/node-mixin/alerts/alerts.libsonnet | 27 +++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/docs/node-mixin/alerts/alerts.libsonnet b/docs/node-mixin/alerts/alerts.libsonnet index 9ef18d52..1b02d5ab 100644 --- a/docs/node-mixin/alerts/alerts.libsonnet +++ b/docs/node-mixin/alerts/alerts.libsonnet @@ -248,6 +248,33 @@ message: 'Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.', }, }, + { + alert: 'NodeRAIDDegraded', + expr: ||| + 'node_md_disks_required - ignoring (state) (node_md_disks{state="active"}) > 0' + ||| % $._config, + 'for': '15m', + labels: { + severity: 'critical', + }, + annotations: { + summary: 'RAID Array is degraded', + description: 'RAID array '{{ $labels.device }}' on {{ $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.', + }, + }, + { + alert: 'NodeRAIDDiskFailure', + expr: ||| + node_md_disks{state="fail"} > 0 + ||| % $._config, + labels: { + severity: 'warning', + }, + annotations: { + summary: 'Failed device in RAID array', + description: 'At least one device in RAID array on {{ $labels.instance }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap.', + }, + }, ], }, ],