Rewrite remote write dashboard using base grafonnet.

Signed-off-by: Callum Styan <callumstyan@gmail.com>
This commit is contained in:
Callum Styan 2019-10-17 15:40:58 -07:00
parent 81fa63006c
commit 818974ff8f

View file

@ -1,5 +1,12 @@
local g = import 'grafana-builder/grafana.libsonnet'; local g = import 'grafana-builder/grafana.libsonnet';
local grafana = import 'grafonnet/grafana.libsonnet';
local dashboard = grafana.dashboard;
local row = grafana.row;
local singlestat = grafana.singlestat;
local prometheus = grafana.prometheus;
local graphPanel = grafana.graphPanel;
local tablePanel = grafana.tablePanel;
local template = grafana.template;
{ {
grafanaDashboards+:: { grafanaDashboards+:: {
'prometheus.json': 'prometheus.json':
@ -92,83 +99,268 @@ local g = import 'grafana-builder/grafana.libsonnet';
), ),
// Remote write specific dashboard. // Remote write specific dashboard.
'prometheus-remote-write.json': 'prometheus-remote-write.json':
g.dashboard('Prometheus Remote Write') local timestampComparison =
.addMultiTemplate('instance', 'prometheus_build_info', 'instance') graphPanel.new(
.addMultiTemplate('cluster', 'kube_pod_container_info{image=~".*prometheus.*"}', 'cluster') 'Highest Timestamp In vs. Highest Timestamp Sent',
.addRow( datasource='$datasource',
g.row('Timestamps') span=6,
.addPanel(
g.panel('Highest Timestamp In vs. Highest Timestamp Sent') +
g.queryPanel('prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~"$cluster", instance=~"$instance"} - ignoring(queue) group_right(instance) prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~"$cluster", instance=~"$instance"}', '{{cluster}}:{{instance}}-{{queue}}') +
{ yaxes: g.yaxes('s') }
) )
.addPanel( .addTarget(prometheus.target(
g.panel('Rate[5m]') + |||
g.queryPanel('rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~"$cluster", instance=~"$instance"}[5m]) - ignoring (queue) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~"$cluster", instance=~"$instance"}[5m])', '{{cluster}}:{{instance}}-{{queue}}') (
prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~"$cluster", instance=~"$instance"}
-
ignoring(queue) group_right(instance) prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~"$cluster", instance=~"$instance"}
)
|||,
legendFormat='{{cluster}}:{{instance}}-{{queue}}',
));
local timestampComparisonRate =
graphPanel.new(
'Rate[5m]',
datasource='$datasource',
span=6,
)
.addTarget(prometheus.target(
|||
(
rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~"$cluster", instance=~"$instance"}[5m])
-
ignoring (queue) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~"$cluster", instance=~"$instance"}[5m])
)
|||,
legendFormat='{{cluster}}:{{instance}}-{{queue}}',
));
local samplesRate =
graphPanel.new(
'Rate, in vs. succeeded or dropped [5m]',
datasource='$datasource',
span=12,
)
.addTarget(prometheus.target(
|||
rate(
prometheus_remote_storage_samples_in_total{cluster=~"$cluster", instance=~"$instance"}[5m])
-
ignoring(queue) group_right(instance) rate(prometheus_remote_storage_succeeded_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m])
-
rate(prometheus_remote_storage_dropped_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m])
|||,
legendFormat='{{cluster}}:{{instance}}-{{queue}}'
));
local shardsQueries =
graphPanel.new(
'Shards: $cluster',
datasource='$datasource',
span=12,
min_span=6,
repeat='cluster'
)
.addTarget(prometheus.target(
'prometheus_remote_storage_shards_max{cluster=~"$cluster", instance=~"$instance"}',
legendFormat='max_shards:{{queue}}'
))
.addTarget(prometheus.target(
'prometheus_remote_storage_shards_min{cluster=~"$cluster", instance=~"$instance"}',
legendFormat='min_shards:{{queue}}'
))
.addTarget(prometheus.target(
'prometheus_remote_storage_shards_desired{cluster=~"$cluster", instance=~"$instance"}',
legendFormat='desired_shards:{{queue}}'
))
.addTarget(prometheus.target(
'prometheus_remote_storage_shards{cluster=~"$cluster", instance=~"$instance"}',
legendFormat='current_shards:{{queue}}'
)) +
{
seriesOverrides: [
{
alias: '/max_shards/',
yaxis: 2,
},
],
};
local shardsCapacity =
graphPanel.new(
'Shard Capacity: $cluster',
datasource='$datasource',
span=6,
)
.addTarget(prometheus.target(
'prometheus_remote_storage_shard_capacity{cluster=~"$cluster", instance=~"$instance"}',
legendFormat='{{cluster}}:{{instance}}-{{queue}}'
));
local pendingSamples =
graphPanel.new(
'Pending Samples: $cluster',
datasource='$datasource',
span=6,
)
.addTarget(prometheus.target(
'prometheus_remote_storage_pending_samples{cluster=~"$cluster", instance=~"$instance"}',
legendFormat='{{cluster}}:{{instance}}-{{queue}}'
));
local walSegment =
graphPanel.new(
'TSDB Current Segment',
datasource='$datasource',
span=6,
formatY1='none',
)
.addTarget(prometheus.target(
'prometheus_tsdb_wal_segment_current{cluster=~"$cluster", instance=~"$instance"}',
legendFormat='{{cluster}}:{{instance}}'
));
local queueSegment =
graphPanel.new(
'Remote Write Current Segment',
datasource='$datasource',
span=6,
formatY1='none',
)
.addTarget(prometheus.target(
'prometheus_wal_watcher_current_segment{cluster=~"$cluster", instance=~"$instance"}',
legendFormat='{{cluster}}:{{instance}}-{{queue}}'
));
local droppedSamples =
graphPanel.new(
'Dropped Samples',
datasource='$datasource',
span=3,
)
.addTarget(prometheus.target(
'rate(prometheus_remote_storage_dropped_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m])',
legendFormat='{{cluster}}:{{instance}}-{{queue}}'
));
local failedSamples =
graphPanel.new(
'Failed Samples',
datasource='$datasource',
span=3,
)
.addTarget(prometheus.target(
'rate(prometheus_remote_storage_failed_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m])',
legendFormat='{{cluster}}:{{instance}}-{{queue}}'
));
local retriedSamples =
graphPanel.new(
'Retried Samples',
datasource='$datasource',
span=3,
)
.addTarget(prometheus.target(
'rate(prometheus_remote_storage_retried_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m])',
legendFormat='{{cluster}}:{{instance}}-{{queue}}'
));
local enqueueRetries =
graphPanel.new(
'Enqueue Retries',
datasource='$datasource',
span=3,
)
.addTarget(prometheus.target(
'rate(prometheus_remote_storage_enqueue_retries_total{cluster=~"$cluster", instance=~"$instance"}[5m])',
legendFormat='{{cluster}}:{{instance}}-{{queue}}'
));
dashboard.new('Prometheus Remote Write',
editable=true)
.addTemplate(
{
hide: 0,
label: null,
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: '',
type: 'datasource',
},
)
.addTemplate(
template.new(
'instance',
'$datasource',
'label_values(prometheus_build_info, instance)' % $._config,
refresh='time',
current={
selected: true,
text: 'All',
value: '$__all',
},
includeAll=true,
)
)
.addTemplate(
template.new(
'cluster',
'$datasource',
'label_values(kube_pod_container_info{image=~".*prometheus.*"}, cluster)' % $._config,
refresh='time',
current={
selected: true,
text: 'All',
value: '$__all',
},
includeAll=true,
)
)
.addTemplate(
template.new(
'queue',
'$datasource',
'label_values(prometheus_remote_storage_shards, queue)' % $._config,
refresh='time',
current={
selected: true,
text: 'All',
value: '$__all',
},
includeAll=true,
) )
) )
.addRow( .addRow(
g.row('Samples') row.new('Timestamps')
.addPanel( .addPanel(timestampComparison)
g.panel('Rate, in vs. succeeded or dropped [5m]') + .addPanel(timestampComparisonRate)
g.queryPanel('rate(prometheus_remote_storage_samples_in_total{cluster=~"$cluster", instance=~"$instance"}[5m])- ignoring(queue) group_right(instance) rate(prometheus_remote_storage_succeeded_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m]) - rate(prometheus_remote_storage_dropped_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m])', '{{cluster}}:{{instance}}-{{queue}}')
)
) )
.addRow( .addRow(
g.row('Shards Overview') row.new('Samples')
.addPanel( .addPanel(samplesRate)
g.panel('Num. Shards') +
g.queryPanel('prometheus_remote_storage_shards{cluster=~"$cluster", instance=~"$instance"}', '{{cluster}}:{{instance}}-{{queue}}')
)
.addPanel(
g.panel('Desired Shards') +
g.queryPanel('prometheus_remote_storage_shards_desired{cluster=~"$cluster", instance=~"$instance"}', '{{cluster}}:{{instance}}-{{queue}}')
)
) )
.addRow( .addRow(
g.row('Shards Details') row.new('Shards'
.addPanel(
g.panel('Capacity') +
g.queryPanel('prometheus_remote_storage_shard_capacity{cluster=~"$cluster", instance=~"$instance"}', '{{cluster}}:{{instance}}-{{queue}}')
)
.addPanel(
g.panel('Min Shards') +
g.queryPanel('prometheus_remote_storage_shards_min{cluster=~"$cluster", instance=~"$instance"}', '{{cluster}}:{{instance}}-{{queue}}')
)
.addPanel(
g.panel('Max Shards') +
g.queryPanel('prometheus_remote_storage_shards_max{cluster=~"$cluster", instance=~"$instance"}', '{{cluster}}:{{instance}}-{{queue}}')
) )
.addPanel(shardsQueries),
) )
.addRow( .addRow(
g.row('WAL Segments') row.new('Shard Details')
.addPanel( .addPanel(shardsCapacity)
g.panel('TSDB Current Segment') + .addPanel(pendingSamples)
g.queryPanel('prometheus_tsdb_wal_segment_current{cluster=~"$cluster", instance=~"$instance"}', '{{cluster}}:{{instance}}')
)
.addPanel(
g.panel('Remote Write Current Segment') +
g.queryPanel('prometheus_wal_watcher_current_segment{cluster=~"$cluster", instance=~"$instance"}', '{{cluster}}:{{instance}}-{{queue}}')
)
) )
.addRow( .addRow(
g.row('Misc Rates.') row.new('Segments')
.addPanel( .addPanel(walSegment)
g.panel('Dropped Samples') + .addPanel(queueSegment)
g.queryPanel('rate(prometheus_remote_storage_dropped_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m])', '{{cluster}}:{{instance}}-{{queue}}') )
) .addRow(
.addPanel( row.new('Misc. Rates')
g.panel('Failed Samples') + .addPanel(droppedSamples)
g.queryPanel('rate(prometheus_remote_storage_failed_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m])', '{{cluster}}:{{instance}}-{{queue}}') .addPanel(failedSamples)
) .addPanel(retriedSamples)
.addPanel( .addPanel(enqueueRetries)
g.panel('Retried Samples') + )
g.queryPanel('rate(prometheus_remote_storage_retried_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m])', '{{cluster}}:{{instance}}-{{queue}}')
)
.addPanel(
g.panel('Enqueue Retries') +
g.queryPanel('rate(prometheus_remote_storage_enqueue_retries_total{cluster=~"$cluster", instance=~"$instance"}[5m])', '{{cluster}}:{{instance}}-{{queue}}')
)
),
}, },
} }