diff --git a/Grafana_Dashboard.json b/Grafana_Dashboard.json index 2e4f993..f10f87b 100644 --- a/Grafana_Dashboard.json +++ b/Grafana_Dashboard.json @@ -36,7 +36,7 @@ } ], "id": null, - "title": "Docker Monitoring with Prometheus", + "title": "Grafana_Dashboard.json", "description": "Docker Monitoring Template", "tags": [ "docker" @@ -44,7 +44,7 @@ "style": "dark", "timezone": "browser", "editable": true, - "hideControls": true, + "hideControls": false, "sharedCrosshair": true, "rows": [ { @@ -340,7 +340,7 @@ "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(container_cpu_user_seconds_total{image!=\"\"}) by (name))", + "expr": "sort_desc(sum(rate(container_cpu_user_seconds_total{image!=\"\"}[1m])) by (name))", "interval": "10s", "intervalFactor": 1, "legendFormat": "{{ name }}", @@ -364,7 +364,7 @@ }, "yaxes": [ { - "format": "percent", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -475,97 +475,173 @@ ] }, { - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "editable": true, - "error": false, - "fill": 0, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 8, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 200, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (container_name) (rate(container_network_receive_bytes_total{image!=\"\"}[1m] ) )", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "{{ Receive }}", - "metric": "container_network_receive_bytes_total", - "refId": "A", - "step": 10 + "aliasColors": { }, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - { - "expr": "sum by (container_name) (rate(container_network_transmit_bytes_total{image!=\"\"}[1m] ) )", - "intervalFactor": 2, - "legendFormat": "{{ Transmit }}", - "metric": "container_network_transmit_bytes_total", - "refId": "B", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Total Network i/o", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "show": true - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": 8, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] + "lines": true, + "linewidth": 2, + "links": [ ], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (name) (rate(container_network_receive_bytes_total{image!=\"\",alias=\"$host\"}[1m] ) ))", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "{{ name }}", + "metric": "container_network_receive_bytes_total", + "refId": "A", + "step": 10 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Container Network Input", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 9, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ ], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (name) (rate(container_network_transmit_bytes_total{image!=\"\", alias=\"$host\"}[1m] ) ))", + "intervalFactor": 2, + "legendFormat": "{{ name }}", + "metric": "container_network_transmit_bytes_total", + "refId": "B", + "step": 10 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Container Network Output", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] } ], "title": "New row" @@ -608,7 +684,7 @@ }, "refresh": "10s", "schemaVersion": 12, - "version": 6, + "version": 1, "links": [], "gnetId": 162 } diff --git a/README.md b/README.md index ad082b8..bbdeb61 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,15 @@ The Slack configuration requires to build a custom integration. * Copy the Webhook URL into the `alertmanager/config.yml` URL section * Fill in Slack username and channel -View alerts status `http://:9090/alerts` +View Prometheus alerts `http://:9090/alerts` +View Alert Manager `http://:9093` + +### Test Alerts +A quick test for your alerts is to stop a service. Stop the node_exporter container and you should notice shortly the alert arrive in Slack. Also check the alerts in both the Alert Manager and Prometheus Alerts just to understand how they flow through the system. + +High load test alert - `docker run --rm -it busybox sh -c "while true; do :; done"`` + +Let this run for a few minutes and you will notice the load alert appear. ## Install Dashboard I created a Dashboard template which is available on [Grafana Docker Dashboard](https://grafana.net/dashboards/179). Simply download the dashboard and select from the Grafana menu -> Dashboards -> Import