diff --git a/docs/node-mixin/alerts/alerts.libsonnet b/docs/node-mixin/alerts/alerts.libsonnet index 7b9fb890..4423f892 100644 --- a/docs/node-mixin/alerts/alerts.libsonnet +++ b/docs/node-mixin/alerts/alerts.libsonnet @@ -37,7 +37,7 @@ ||| % $._config, 'for': '1h', labels: { - severity: 'critical', + severity: '%(nodeCriticalSeverity)s' % $._config, }, annotations: { summary: 'Filesystem is predicted to run out of space within the next 4 hours.', @@ -73,7 +73,7 @@ ||| % $._config, 'for': '1h', labels: { - severity: 'critical', + severity: '%(nodeCriticalSeverity)s' % $._config, }, annotations: { summary: 'Filesystem has less than 3% space left.', @@ -113,7 +113,7 @@ ||| % $._config, 'for': '1h', labels: { - severity: 'critical', + severity: '%(nodeCriticalSeverity)s' % $._config, }, annotations: { summary: 'Filesystem is predicted to run out of inodes within the next 4 hours.', @@ -149,7 +149,7 @@ ||| % $._config, 'for': '1h', labels: { - severity: 'critical', + severity: '%(nodeCriticalSeverity)s' % $._config, }, annotations: { summary: 'Filesystem has less than 3% inodes left.', diff --git a/docs/node-mixin/config.libsonnet b/docs/node-mixin/config.libsonnet index 95070ca9..8cf9860f 100644 --- a/docs/node-mixin/config.libsonnet +++ b/docs/node-mixin/config.libsonnet @@ -17,6 +17,19 @@ // them here, e.g. 'device!="tmpfs"'. diskDeviceSelector: '', + // Some of the alerts are meant to fire if a critical failure of a + // node is imminent (e.g. the disk is about to run full). In a + // true “cloud native” setup, failures of a single node should be + // tolerated. Hence, even imminent failure of a single node is no + // reason to create a paging alert. However, in practice there are + // still many situations where operators like to get paged in time + // before a node runs out of disk space. nodeCriticalSeverity can + // be set to the desired severity for this kind of alerts. This + // can even be templated to depend on labels of the node, e.g. you + // could make this critical for traditional database masters but + // just a warning for K8s nodes. + nodeCriticalSeverity: 'critical', + grafana_prefix: '', }, }