mirror of
https://github.com/prometheus/prometheus.git
synced 2024-11-13 17:14:05 -08:00
1dcb7637f5
Add metrics around checkpointing and persistence * Add a metric to say if checkpointing is happening, and another to track total checkpoint time and count. This breaks the existing prometheus_local_storage_checkpoint_duration_seconds by renaming it to prometheus_local_storage_checkpoint_last_duration_seconds as the former name is more appropriate for a summary. * Add metric for last checkpoint size. * Add metric for series/chunks processed by checkpoints. For long checkpoints it'd be useful to see how they're progressing. * Add metric for dirty series * Add metric for number of chunks persisted per series. You can get the number of chunks from chunk_ops, but not the matching number of series. This helps determine the size of the writes being made. * Add metric for chunks queued for persistence Chunks created includes both chunks that'll need persistence and chunks read in for queries. This only includes chunks created for persistence. * Code review comments on new persistence metrics.
126 lines
5.4 KiB
HTML
126 lines
5.4 KiB
HTML
{{ template "head" . }}
|
|
|
|
{{ template "prom_right_table_head" }}
|
|
<tr>
|
|
<th colspan="2">Overview</th>
|
|
</tr>
|
|
<tr>
|
|
<td>CPU</td>
|
|
<td>{{ template "prom_query_drilldown" (args (printf "irate(process_cpu_seconds_total{job='prometheus',instance='%s'}[5m])" .Params.instance) "s/s" "humanizeNoSmallPrefix") }}</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Memory</td>
|
|
<td>{{ template "prom_query_drilldown" (args (printf "process_resident_memory_bytes{job='prometheus',instance='%s'}" .Params.instance) "B" "humanize1024") }}</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Version</td>
|
|
<td>{{ with query (printf "prometheus_build_info{job='prometheus',instance='%s'}" .Params.instance) }}{{. | first | label "version"}}{{end}}</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<th colspan="2">Storage</th>
|
|
</tr>
|
|
<tr>
|
|
<td>Ingested Samples</td>
|
|
<td>{{ template "prom_query_drilldown" (args (printf "irate(prometheus_local_storage_ingested_samples_total{job='prometheus',instance='%s'}[5m])" .Params.instance) "/s" "humanizeNoSmallPrefix") }}</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Time Series</td>
|
|
<td>{{ template "prom_query_drilldown" (args (printf "prometheus_local_storage_memory_series{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }}</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Indexing Queue</td>
|
|
<td>{{ template "prom_query_drilldown" (args (printf "prometheus_local_storage_indexing_queue_length{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }}</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Chunks</td>
|
|
<td>{{ template "prom_query_drilldown" (args (printf "prometheus_local_storage_memory_chunks{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }}</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Chunk Descriptors</td>
|
|
<td>{{ template "prom_query_drilldown" (args (printf "prometheus_local_storage_memory_chunkdescs{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }}</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Chunks To Persist</td>
|
|
<td>{{ template "prom_query_drilldown" (args (printf "prometheus_local_storage_chunks_to_persist{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }}</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Checkpoint Duration</td>
|
|
<td>{{ template "prom_query_drilldown" (args (printf "prometheus_local_storage_checkpoint_last_duration_seconds{job='prometheus',instance='%s'}" .Params.instance) "" "humanizeDuration") }}</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<th colspan="2">Rules</th>
|
|
</tr>
|
|
<tr>
|
|
<td>Evaluation Duration</td>
|
|
<td>{{ template "prom_query_drilldown" (args (printf "irate(prometheus_evaluator_duration_seconds_sum{job='prometheus',instance='%s'}[5m]) / irate(prometheus_evaluator_duration_seconds_count{job='prometheus',instance='%s'}[5m])" .Params.instance .Params.instance) "" "humanizeDuration") }}</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Notification Latency</td>
|
|
<td>{{ template "prom_query_drilldown" (args (printf "irate(prometheus_notifications_latency_seconds_sum{job='prometheus',instance='%s'}[5m]) / irate(prometheus_notifications_latency_seconds_count{job='prometheus',instance='%s'}[5m])" .Params.instance .Params.instance) "" "humanizeDuration") }}</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Notification Queue</td>
|
|
<td>{{ template "prom_query_drilldown" (args (printf "prometheus_notifications_queue_length{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }}</td>
|
|
</tr>
|
|
<tr>
|
|
<th colspan="2">HTTP Server</th>
|
|
</tr>
|
|
{{ range printf "http_request_duration_microseconds_count{job='prometheus',instance='%s',handler=~'^(query.*|federate|consoles)$'}" .Params.instance | query | sortByLabel "handler" }}
|
|
<tr>
|
|
<td>{{ .Labels.handler }}</td>
|
|
<td>{{ template "prom_query_drilldown" (args (printf "irate(http_request_duration_microseconds_count{job='prometheus',instance='%s',handler='%s'}[5m])" .Labels.instance .Labels.handler) "/s" "humanizeNoSmallPrefix") }}</td>
|
|
</tr>
|
|
{{ end }}
|
|
|
|
{{ template "prom_right_table_tail" }}
|
|
|
|
{{ template "prom_content_head" . }}
|
|
<h1>Prometheus Overview - {{ .Params.instance }}</h1>
|
|
|
|
<h3>Ingested Samples</h3>
|
|
<div id="samplesGraph"></div>
|
|
<script>
|
|
new PromConsole.Graph({
|
|
node: document.querySelector("#samplesGraph"),
|
|
expr: "irate(prometheus_local_storage_ingested_samples_total{job='prometheus',instance='{{ .Params.instance }}'}[5m])",
|
|
name: 'Ingested Samples',
|
|
yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
|
|
yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
|
|
yTitle: "Samples",
|
|
yUnits: "/s",
|
|
})
|
|
</script>
|
|
|
|
<h3>Time Series</h3>
|
|
<div id="seriesGraph"></div>
|
|
<script>
|
|
new PromConsole.Graph({
|
|
node: document.querySelector("#seriesGraph"),
|
|
expr: "prometheus_local_storage_memory_series{job='prometheus',instance='{{ .Params.instance }}'}",
|
|
name: 'Time Series',
|
|
yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
|
|
yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
|
|
yTitle: "Time Series",
|
|
})
|
|
</script>
|
|
|
|
<h3>HTTP Server</h3>
|
|
<div id="serverGraph"></div>
|
|
<script>
|
|
new PromConsole.Graph({
|
|
node: document.querySelector("#serverGraph"),
|
|
expr: "irate(http_request_duration_microseconds_count{job='prometheus',instance='{{ .Params.instance }}',handler=~'^(query.*|federate|consoles)$'}[5m])",
|
|
name: '[[handler]]',
|
|
yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
|
|
yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
|
|
yTitle: "Requests",
|
|
yUnits: "/s",
|
|
})
|
|
</script>
|
|
|
|
{{ template "prom_content_tail" . }}
|
|
|
|
{{ template "tail" }}
|