Created
October 30, 2019 08:51
-
-
Save pavelnikolov/fd6e2b19a4a30932b5fec267c9049f4d to your computer and use it in GitHub Desktop.
Grafana dashboard for monitoring prometheus remote write.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"annotations": { | |
"list": [ | |
{ | |
"builtIn": 1, | |
"datasource": "-- Grafana --", | |
"enable": true, | |
"hide": true, | |
"iconColor": "rgba(0, 211, 255, 1)", | |
"name": "Annotations & Alerts", | |
"type": "dashboard" | |
} | |
] | |
}, | |
"description": "Updated version of Remote Storage Stats by skant ( https://grafana.com/dashboards/2009 ) valid for Grafana 6.2.2 and InfluxDB 1.7.6 \r\n\r\nRelevant when troubleshooting if your InfluxDB remote database is able to keep up with Prometheus writes", | |
"editable": true, | |
"gnetId": 10303, | |
"graphTooltip": 1, | |
"id": 11, | |
"iteration": 1572415041917, | |
"links": [], | |
"panels": [ | |
{ | |
"collapsed": false, | |
"gridPos": { | |
"h": 1, | |
"w": 24, | |
"x": 0, | |
"y": 0 | |
}, | |
"id": 14, | |
"panels": [], | |
"repeat": null, | |
"title": "Samples sent to remote storage", | |
"type": "row" | |
}, | |
{ | |
"aliasColors": {}, | |
"bars": false, | |
"dashLength": 10, | |
"dashes": false, | |
"datasource": "$datasource", | |
"description": "Per second rate of number of samples successfully sent to remote storage.", | |
"fill": 1, | |
"gridPos": { | |
"h": 8, | |
"w": 12, | |
"x": 0, | |
"y": 1 | |
}, | |
"id": 9, | |
"legend": { | |
"alignAsTable": false, | |
"avg": false, | |
"current": false, | |
"max": false, | |
"min": false, | |
"rightSide": false, | |
"show": true, | |
"total": false, | |
"values": false | |
}, | |
"lines": true, | |
"linewidth": 1, | |
"links": [], | |
"nullPointMode": "null", | |
"options": {}, | |
"paceLength": 10, | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"spaceLength": 10, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [ | |
{ | |
"aggregator": "sum", | |
"alias": "", | |
"counterMax": "", | |
"counterResetValue": "1", | |
"currentTagKey": "", | |
"currentTagValue": "", | |
"downsampleAggregator": "avg", | |
"downsampleFillPolicy": "none", | |
"downsampleInterval": "", | |
"explicitTags": false, | |
"expr": "sum(rate(prometheus_remote_storage_samples_in_total[2m]))", | |
"format": "time_series", | |
"hide": false, | |
"interval": "1m", | |
"intervalFactor": 1, | |
"isCounter": true, | |
"legendFormat": "total", | |
"metric": "prometheus_remote_storage_succeeded_samples_total", | |
"refId": "A", | |
"shouldComputeRate": true, | |
"tags": {} | |
} | |
], | |
"thresholds": [], | |
"timeFrom": null, | |
"timeRegions": [], | |
"timeShift": null, | |
"title": "Samples", | |
"tooltip": { | |
"shared": true, | |
"sort": 0, | |
"value_type": "individual" | |
}, | |
"type": "graph", | |
"xaxis": { | |
"buckets": null, | |
"mode": "time", | |
"name": null, | |
"show": true, | |
"values": [] | |
}, | |
"yaxes": [ | |
{ | |
"decimals": null, | |
"format": "short", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": "0", | |
"show": true | |
}, | |
{ | |
"format": "short", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
} | |
], | |
"yaxis": { | |
"align": false, | |
"alignLevel": null | |
} | |
}, | |
{ | |
"aliasColors": {}, | |
"bars": false, | |
"dashLength": 10, | |
"dashes": false, | |
"datasource": "$datasource", | |
"fill": 1, | |
"gridPos": { | |
"h": 8, | |
"w": 12, | |
"x": 12, | |
"y": 1 | |
}, | |
"id": 22, | |
"legend": { | |
"avg": false, | |
"current": false, | |
"max": false, | |
"min": false, | |
"show": true, | |
"total": false, | |
"values": false | |
}, | |
"lines": true, | |
"linewidth": 1, | |
"links": [], | |
"nullPointMode": "null", | |
"options": {}, | |
"paceLength": 10, | |
"percentage": false, | |
"pointradius": 2, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"spaceLength": 10, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [ | |
{ | |
"expr": "sum(rate(prometheus_remote_storage_succeeded_samples_total[2m]))", | |
"format": "time_series", | |
"intervalFactor": 1, | |
"legendFormat": "succeeded", | |
"refId": "A" | |
} | |
], | |
"thresholds": [], | |
"timeFrom": null, | |
"timeRegions": [], | |
"timeShift": null, | |
"title": "Succeded Samples - Total", | |
"tooltip": { | |
"shared": true, | |
"sort": 0, | |
"value_type": "individual" | |
}, | |
"type": "graph", | |
"xaxis": { | |
"buckets": null, | |
"mode": "time", | |
"name": null, | |
"show": true, | |
"values": [] | |
}, | |
"yaxes": [ | |
{ | |
"format": "short", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": "0", | |
"show": true | |
}, | |
{ | |
"format": "short", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
} | |
], | |
"yaxis": { | |
"align": false, | |
"alignLevel": null | |
} | |
}, | |
{ | |
"aliasColors": {}, | |
"bars": false, | |
"dashLength": 10, | |
"dashes": false, | |
"datasource": "$datasource", | |
"fill": 1, | |
"gridPos": { | |
"h": 8, | |
"w": 12, | |
"x": 0, | |
"y": 9 | |
}, | |
"id": 18, | |
"legend": { | |
"avg": false, | |
"current": false, | |
"max": false, | |
"min": false, | |
"show": true, | |
"total": false, | |
"values": false | |
}, | |
"lines": true, | |
"linewidth": 1, | |
"links": [], | |
"nullPointMode": "null", | |
"options": {}, | |
"paceLength": 10, | |
"percentage": false, | |
"pointradius": 2, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"spaceLength": 10, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [ | |
{ | |
"expr": "sum(rate(prometheus_remote_storage_failed_samples_total[5m]))", | |
"format": "time_series", | |
"intervalFactor": 1, | |
"legendFormat": "failed", | |
"refId": "A" | |
} | |
], | |
"thresholds": [], | |
"timeFrom": null, | |
"timeRegions": [], | |
"timeShift": null, | |
"title": "Total Attempts to Send Samples that Failed ", | |
"tooltip": { | |
"shared": true, | |
"sort": 0, | |
"value_type": "individual" | |
}, | |
"type": "graph", | |
"xaxis": { | |
"buckets": null, | |
"mode": "time", | |
"name": null, | |
"show": true, | |
"values": [] | |
}, | |
"yaxes": [ | |
{ | |
"format": "short", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
}, | |
{ | |
"format": "short", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
} | |
], | |
"yaxis": { | |
"align": false, | |
"alignLevel": null | |
} | |
}, | |
{ | |
"aliasColors": {}, | |
"bars": false, | |
"dashLength": 10, | |
"dashes": false, | |
"datasource": "$datasource", | |
"fill": 1, | |
"gridPos": { | |
"h": 8, | |
"w": 12, | |
"x": 12, | |
"y": 9 | |
}, | |
"id": 20, | |
"legend": { | |
"avg": false, | |
"current": false, | |
"max": false, | |
"min": false, | |
"show": true, | |
"total": false, | |
"values": false | |
}, | |
"lines": true, | |
"linewidth": 1, | |
"links": [], | |
"nullPointMode": "null", | |
"options": {}, | |
"paceLength": 10, | |
"percentage": false, | |
"pointradius": 2, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"spaceLength": 10, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [ | |
{ | |
"expr": "prometheus_remote_storage_pending_samples", | |
"format": "time_series", | |
"intervalFactor": 1, | |
"legendFormat": "pending samples", | |
"refId": "A" | |
} | |
], | |
"thresholds": [], | |
"timeFrom": null, | |
"timeRegions": [], | |
"timeShift": null, | |
"title": "Pending Samples", | |
"tooltip": { | |
"shared": true, | |
"sort": 0, | |
"value_type": "individual" | |
}, | |
"type": "graph", | |
"xaxis": { | |
"buckets": null, | |
"mode": "time", | |
"name": null, | |
"show": true, | |
"values": [] | |
}, | |
"yaxes": [ | |
{ | |
"decimals": null, | |
"format": "short", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": "0", | |
"show": true | |
}, | |
{ | |
"format": "short", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
} | |
], | |
"yaxis": { | |
"align": false, | |
"alignLevel": null | |
} | |
}, | |
{ | |
"collapsed": false, | |
"gridPos": { | |
"h": 1, | |
"w": 24, | |
"x": 0, | |
"y": 17 | |
}, | |
"id": 15, | |
"panels": [], | |
"repeat": null, | |
"title": "Queue & Latency", | |
"type": "row" | |
}, | |
{ | |
"aliasColors": {}, | |
"bars": false, | |
"dashLength": 10, | |
"dashes": false, | |
"datasource": "$datasource", | |
"description": "Total attempts to enqueue information in a remote storage", | |
"fill": 1, | |
"gridPos": { | |
"h": 7, | |
"w": 12, | |
"x": 0, | |
"y": 18 | |
}, | |
"id": 10, | |
"legend": { | |
"avg": false, | |
"current": false, | |
"max": false, | |
"min": false, | |
"show": true, | |
"total": false, | |
"values": false | |
}, | |
"lines": true, | |
"linewidth": 1, | |
"links": [], | |
"nullPointMode": "null", | |
"options": {}, | |
"paceLength": 10, | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"spaceLength": 10, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [ | |
{ | |
"aggregator": "sum", | |
"alias": "", | |
"currentTagKey": "", | |
"currentTagValue": "", | |
"downsampleAggregator": "avg", | |
"downsampleFillPolicy": "none", | |
"downsampleInterval": "", | |
"expr": "increase(prometheus_remote_storage_enqueue_retries_total[5m])", | |
"format": "time_series", | |
"hide": false, | |
"intervalFactor": 1, | |
"metric": "prometheus_remote_storage_queue_capacity", | |
"refId": "A", | |
"shouldComputeRate": false, | |
"tags": {} | |
} | |
], | |
"thresholds": [], | |
"timeFrom": null, | |
"timeRegions": [], | |
"timeShift": null, | |
"title": "Total Retries to Enqueue", | |
"tooltip": { | |
"shared": true, | |
"sort": 0, | |
"value_type": "individual" | |
}, | |
"type": "graph", | |
"xaxis": { | |
"buckets": null, | |
"mode": "time", | |
"name": null, | |
"show": true, | |
"values": [] | |
}, | |
"yaxes": [ | |
{ | |
"format": "short", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
}, | |
{ | |
"format": "short", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
} | |
], | |
"yaxis": { | |
"align": false, | |
"alignLevel": null | |
} | |
}, | |
{ | |
"aliasColors": {}, | |
"bars": false, | |
"dashLength": 10, | |
"dashes": false, | |
"datasource": "prometheus", | |
"description": "50th and 95th percentiles of remote storage batch latency", | |
"fill": 0, | |
"gridPos": { | |
"h": 7, | |
"w": 12, | |
"x": 12, | |
"y": 18 | |
}, | |
"id": 12, | |
"legend": { | |
"avg": false, | |
"current": false, | |
"max": false, | |
"min": false, | |
"show": true, | |
"total": false, | |
"values": false | |
}, | |
"lines": true, | |
"linewidth": 1, | |
"links": [], | |
"nullPointMode": "null", | |
"options": {}, | |
"paceLength": 10, | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"spaceLength": 10, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [ | |
{ | |
"aggregator": "p95", | |
"alias": "95th Percentile", | |
"counterMax": "", | |
"downsampleAggregator": "avg", | |
"downsampleFillPolicy": "none", | |
"downsampleInterval": "", | |
"expandHelper": 0, | |
"expr": "histogram_quantile(0.50, sum(rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket[5m])) by (le))", | |
"format": "time_series", | |
"intervalFactor": 1, | |
"isCounter": false, | |
"legendFormat": "p50", | |
"metric": "prometheus_remote_storage_sent_batch_duration_seconds_bucket", | |
"refId": "B", | |
"shouldComputeRate": true, | |
"target": "Bosun Query" | |
}, | |
{ | |
"aggregator": "p95", | |
"alias": "95th Percentile", | |
"counterMax": "", | |
"downsampleAggregator": "avg", | |
"downsampleFillPolicy": "none", | |
"downsampleInterval": "", | |
"expandHelper": 0, | |
"expr": "histogram_quantile(0.95, sum(rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket[5m])) by (le))", | |
"format": "time_series", | |
"intervalFactor": 1, | |
"isCounter": false, | |
"legendFormat": "p95", | |
"metric": "prometheus_remote_storage_sent_batch_duration_seconds_bucket", | |
"refId": "A", | |
"shouldComputeRate": true, | |
"target": "Bosun Query" | |
}, | |
{ | |
"aggregator": "p95", | |
"alias": "95th Percentile", | |
"counterMax": "", | |
"downsampleAggregator": "avg", | |
"downsampleFillPolicy": "none", | |
"downsampleInterval": "", | |
"expandHelper": 0, | |
"expr": "histogram_quantile(0.99, sum(rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket[5m])) by (le))", | |
"format": "time_series", | |
"intervalFactor": 1, | |
"isCounter": false, | |
"legendFormat": "p99", | |
"metric": "prometheus_remote_storage_sent_batch_duration_seconds_bucket", | |
"refId": "C", | |
"shouldComputeRate": true, | |
"target": "Bosun Query" | |
} | |
], | |
"thresholds": [], | |
"timeFrom": null, | |
"timeRegions": [], | |
"timeShift": null, | |
"title": "Latency", | |
"tooltip": { | |
"shared": true, | |
"sort": 0, | |
"value_type": "individual" | |
}, | |
"type": "graph", | |
"xaxis": { | |
"buckets": null, | |
"mode": "time", | |
"name": null, | |
"show": true, | |
"values": [] | |
}, | |
"yaxes": [ | |
{ | |
"format": "ms", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
}, | |
{ | |
"format": "short", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
} | |
], | |
"yaxis": { | |
"align": false, | |
"alignLevel": null | |
} | |
}, | |
{ | |
"collapsed": false, | |
"gridPos": { | |
"h": 1, | |
"w": 24, | |
"x": 0, | |
"y": 25 | |
}, | |
"id": 16, | |
"panels": [], | |
"repeat": null, | |
"title": "Shards & Dropped Samples", | |
"type": "row" | |
}, | |
{ | |
"aliasColors": {}, | |
"bars": false, | |
"dashLength": 10, | |
"dashes": false, | |
"datasource": "$datasource", | |
"description": "Current number of active shards", | |
"editable": true, | |
"error": false, | |
"fill": 1, | |
"grid": {}, | |
"gridPos": { | |
"h": 7, | |
"w": 12, | |
"x": 0, | |
"y": 26 | |
}, | |
"id": 7, | |
"legend": { | |
"avg": false, | |
"current": false, | |
"max": false, | |
"min": false, | |
"show": true, | |
"total": false, | |
"values": false | |
}, | |
"lines": true, | |
"linewidth": 2, | |
"links": [], | |
"nullPointMode": "connected", | |
"options": {}, | |
"paceLength": 10, | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"spaceLength": 10, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [ | |
{ | |
"aggregator": "sum", | |
"alias": "", | |
"currentTagKey": "", | |
"currentTagValue": "", | |
"downsampleAggregator": "avg", | |
"downsampleFillPolicy": "none", | |
"downsampleInterval": "", | |
"explicitTags": false, | |
"expr": "prometheus_remote_storage_shards", | |
"format": "time_series", | |
"hide": false, | |
"intervalFactor": 1, | |
"isCounter": false, | |
"legendFormat": "shards", | |
"metric": "prometheus_remote_storage_shards", | |
"refId": "A", | |
"shouldComputeRate": false, | |
"tags": {} | |
}, | |
{ | |
"expr": "prometheus_remote_storage_shards_desired", | |
"format": "time_series", | |
"intervalFactor": 1, | |
"legendFormat": "desired shards", | |
"refId": "B" | |
} | |
], | |
"thresholds": [], | |
"timeFrom": null, | |
"timeRegions": [], | |
"timeShift": null, | |
"title": "Remote storage shards", | |
"tooltip": { | |
"msResolution": false, | |
"shared": true, | |
"sort": 0, | |
"value_type": "cumulative" | |
}, | |
"type": "graph", | |
"xaxis": { | |
"buckets": null, | |
"mode": "time", | |
"name": null, | |
"show": true, | |
"values": [] | |
}, | |
"yaxes": [ | |
{ | |
"format": "none", | |
"label": "", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
}, | |
{ | |
"format": "short", | |
"label": "", | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
} | |
], | |
"yaxis": { | |
"align": false, | |
"alignLevel": null | |
} | |
}, | |
{ | |
"aliasColors": {}, | |
"bars": false, | |
"dashLength": 10, | |
"dashes": false, | |
"datasource": "$datasource", | |
"description": "number of samples which were dropped/second due to the queue being full.", | |
"editable": true, | |
"error": false, | |
"fill": 1, | |
"grid": {}, | |
"gridPos": { | |
"h": 7, | |
"w": 12, | |
"x": 12, | |
"y": 26 | |
}, | |
"id": 5, | |
"legend": { | |
"avg": false, | |
"current": false, | |
"max": false, | |
"min": false, | |
"show": true, | |
"total": false, | |
"values": false | |
}, | |
"lines": true, | |
"linewidth": 2, | |
"links": [], | |
"nullPointMode": "connected", | |
"options": {}, | |
"paceLength": 10, | |
"percentage": false, | |
"pointradius": 5, | |
"points": false, | |
"renderer": "flot", | |
"seriesOverrides": [], | |
"spaceLength": 10, | |
"stack": false, | |
"steppedLine": false, | |
"targets": [ | |
{ | |
"aggregator": "sum", | |
"downsampleAggregator": "avg", | |
"downsampleFillPolicy": "none", | |
"downsampleInterval": "", | |
"expr": "sum(rate(prometheus_remote_storage_dropped_samples_total[5m]))", | |
"format": "time_series", | |
"hide": false, | |
"intervalFactor": 1, | |
"isCounter": false, | |
"legendFormat": "dropped", | |
"metric": "prometheus_remote_storage_dropped_samples_total", | |
"refId": "A", | |
"shouldComputeRate": false | |
} | |
], | |
"thresholds": [], | |
"timeFrom": null, | |
"timeRegions": [], | |
"timeShift": null, | |
"title": "Remote storage dropped samples", | |
"tooltip": { | |
"msResolution": false, | |
"shared": true, | |
"sort": 0, | |
"value_type": "cumulative" | |
}, | |
"type": "graph", | |
"xaxis": { | |
"buckets": null, | |
"mode": "time", | |
"name": null, | |
"show": true, | |
"values": [] | |
}, | |
"yaxes": [ | |
{ | |
"format": "short", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
}, | |
{ | |
"format": "short", | |
"label": null, | |
"logBase": 1, | |
"max": null, | |
"min": null, | |
"show": true | |
} | |
], | |
"yaxis": { | |
"align": false, | |
"alignLevel": null | |
} | |
} | |
], | |
"refresh": "30s", | |
"schemaVersion": 17, | |
"style": "dark", | |
"tags": [], | |
"templating": { | |
"list": [ | |
{ | |
"current": { | |
"selected": true, | |
"tags": [], | |
"text": "prometheus", | |
"value": "prometheus" | |
}, | |
"hide": 0, | |
"label": null, | |
"name": "datasource", | |
"options": [], | |
"query": "prometheus", | |
"refresh": 1, | |
"regex": "", | |
"skipUrlSync": false, | |
"type": "datasource" | |
} | |
] | |
}, | |
"time": { | |
"from": "now-1h", | |
"to": "now" | |
}, | |
"timepicker": { | |
"nowDelay": "2m", | |
"refresh_intervals": [ | |
"5s", | |
"10s", | |
"30s", | |
"1m", | |
"5m", | |
"15m", | |
"30m", | |
"1h", | |
"2h", | |
"1d" | |
], | |
"time_options": [ | |
"5m", | |
"15m", | |
"1h", | |
"6h", | |
"12h", | |
"24h", | |
"2d", | |
"7d", | |
"30d" | |
] | |
}, | |
"timezone": "browser", | |
"title": "Prometheus - Remote Storage Stats", | |
"uid": "cs2yKyMWz", | |
"version": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment