From 7cc1a49d9e1c5798c0cc0e8c14d82b80cde8e0ba Mon Sep 17 00:00:00 2001 From: Brian Christner Date: Thu, 28 Jul 2016 13:39:10 +0200 Subject: [PATCH 1/3] Added Alert Manager address Added Alert manager address --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ad082b8..a44df61 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,8 @@ The Slack configuration requires to build a custom integration. * Copy the Webhook URL into the `alertmanager/config.yml` URL section * Fill in Slack username and channel -View alerts status `http://:9090/alerts` +View Prometheus alerts `http://:9090/alerts` +View Alert Manager `http://:9093` ## Install Dashboard I created a Dashboard template which is available on [Grafana Docker Dashboard](https://grafana.net/dashboards/179). Simply download the dashboard and select from the Grafana menu -> Dashboards -> Import From 36a7b4a871ba5b72bdd5c260a90a6f207bc71fad Mon Sep 17 00:00:00 2001 From: Brian Christner Date: Thu, 28 Jul 2016 13:43:03 +0200 Subject: [PATCH 2/3] Update README.md --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index a44df61..bbdeb61 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,13 @@ The Slack configuration requires to build a custom integration. View Prometheus alerts `http://:9090/alerts` View Alert Manager `http://:9093` +### Test Alerts +A quick test for your alerts is to stop a service. Stop the node_exporter container and you should notice shortly the alert arrive in Slack. Also check the alerts in both the Alert Manager and Prometheus Alerts just to understand how they flow through the system. + +High load test alert - `docker run --rm -it busybox sh -c "while true; do :; done"`` + +Let this run for a few minutes and you will notice the load alert appear. + ## Install Dashboard I created a Dashboard template which is available on [Grafana Docker Dashboard](https://grafana.net/dashboards/179). Simply download the dashboard and select from the Grafana menu -> Dashboards -> Import From 9c1897bfc6e94b8618e48f2ff31bce762069311b Mon Sep 17 00:00:00 2001 From: Brian Christner Date: Mon, 22 Aug 2016 16:26:38 +0200 Subject: [PATCH 3/3] Update Grafana_Dashboard.json Updated Dashboard. Split network graphs and fined tuned some other graphs --- Grafana_Dashboard.json | 264 ++++++++++++++++++++++++++--------------- 1 file changed, 170 insertions(+), 94 deletions(-) diff --git a/Grafana_Dashboard.json b/Grafana_Dashboard.json index 2e4f993..f10f87b 100644 --- a/Grafana_Dashboard.json +++ b/Grafana_Dashboard.json @@ -36,7 +36,7 @@ } ], "id": null, - "title": "Docker Monitoring with Prometheus", + "title": "Grafana_Dashboard.json", "description": "Docker Monitoring Template", "tags": [ "docker" @@ -44,7 +44,7 @@ "style": "dark", "timezone": "browser", "editable": true, - "hideControls": true, + "hideControls": false, "sharedCrosshair": true, "rows": [ { @@ -340,7 +340,7 @@ "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(container_cpu_user_seconds_total{image!=\"\"}) by (name))", + "expr": "sort_desc(sum(rate(container_cpu_user_seconds_total{image!=\"\"}[1m])) by (name))", "interval": "10s", "intervalFactor": 1, "legendFormat": "{{ name }}", @@ -364,7 +364,7 @@ }, "yaxes": [ { - "format": "percent", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -475,97 +475,173 @@ ] }, { - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "editable": true, - "error": false, - "fill": 0, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 8, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 200, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (container_name) (rate(container_network_receive_bytes_total{image!=\"\"}[1m] ) )", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "{{ Receive }}", - "metric": "container_network_receive_bytes_total", - "refId": "A", - "step": 10 + "aliasColors": { }, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - { - "expr": "sum by (container_name) (rate(container_network_transmit_bytes_total{image!=\"\"}[1m] ) )", - "intervalFactor": 2, - "legendFormat": "{{ Transmit }}", - "metric": "container_network_transmit_bytes_total", - "refId": "B", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Total Network i/o", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "show": true - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": 8, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] + "lines": true, + "linewidth": 2, + "links": [ ], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (name) (rate(container_network_receive_bytes_total{image!=\"\",alias=\"$host\"}[1m] ) ))", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "{{ name }}", + "metric": "container_network_receive_bytes_total", + "refId": "A", + "step": 10 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Container Network Input", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 9, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ ], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (name) (rate(container_network_transmit_bytes_total{image!=\"\", alias=\"$host\"}[1m] ) ))", + "intervalFactor": 2, + "legendFormat": "{{ name }}", + "metric": "container_network_transmit_bytes_total", + "refId": "B", + "step": 10 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Container Network Output", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] } ], "title": "New row" @@ -608,7 +684,7 @@ }, "refresh": "10s", "schemaVersion": 12, - "version": 6, + "version": 1, "links": [], "gnetId": 162 }