From 75ec5a0d9400aaff18727e99cdc3153612e8d50f Mon Sep 17 00:00:00 2001 From: Brian Christner Date: Thu, 28 Jul 2016 13:37:16 +0200 Subject: [PATCH] added System_Monitoring Dashboard for Grafana with Alerting metrics --- dashboards/HighLoadDashboard.json | 259 ----------- dashboards/System_Monitoring.json | 749 ++++++++++++++++++++++++++++++ 2 files changed, 749 insertions(+), 259 deletions(-) delete mode 100644 dashboards/HighLoadDashboard.json create mode 100644 dashboards/System_Monitoring.json diff --git a/dashboards/HighLoadDashboard.json b/dashboards/HighLoadDashboard.json deleted file mode 100644 index 66da47c..0000000 --- a/dashboards/HighLoadDashboard.json +++ /dev/null @@ -1,259 +0,0 @@ -{ - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "Prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" - }, - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "3.1.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - } - ], - "id": null, - "title": "High Load", - "tags": [], - "style": "dark", - "timezone": "browser", - "editable": true, - "hideControls": false, - "sharedCrosshair": false, - "rows": [ - { - "collapse": false, - "editable": true, - "height": 323.625, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 1, - "isNew": true, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_load1", - "hide": false, - "intervalFactor": 2, - "legendFormat": "", - "metric": "node_load1", - "refId": "A", - "step": 10 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Panel Title", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "show": true - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "title": "Row" - }, - { - "collapse": false, - "editable": true, - "height": 407.4375, - "panels": [ - { - "aliasColors": { - "ALERTS{alertname=\"high_load\",alertstate=\"firing\",instance=\"node-exporter:9100\",job=\"prometheus\"}": "#BF1B00" - }, - "bars": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 3, - "isNew": true, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "ALERTS", - "intervalFactor": 1, - "metric": "ALERTS", - "refId": "A", - "step": 5 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Panel Title", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "show": true - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "title": "New row" - } - ], - "time": { - "from": "now-3h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "templating": { - "list": [] - }, - "annotations": { - "list": [] - }, - "refresh": "10s", - "schemaVersion": 12, - "version": 4, - "links": [], - "gnetId": null -} \ No newline at end of file diff --git a/dashboards/System_Monitoring.json b/dashboards/System_Monitoring.json new file mode 100644 index 0000000..568a6a3 --- /dev/null +++ b/dashboards/System_Monitoring.json @@ -0,0 +1,749 @@ +{ + "id": 3, + "title": "System Monitoring", + "tags": [ + "alerts" + ], + "style": "dark", + "timezone": "browser", + "editable": true, + "hideControls": true, + "sharedCrosshair": false, + "rows": [ + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "decimals": null, + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 6, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "ALERTS{alertname=\"high_load\",alertstate=\"firing\"}", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "metric": "ALERTS", + "refId": "A", + "step": 120 + } + ], + "thresholds": "0.6,0.9", + "title": "Load Status", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "HIGH", + "value": "1" + }, + { + "op": "=", + "text": "OK", + "value": "0" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "decimals": null, + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 9, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 6, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "ALERTS{alertname=\"service_down\",alertstate=\"firing\"}", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "metric": "ALERTS", + "refId": "A", + "step": 120 + } + ], + "thresholds": "0.6,0.9", + "title": "Service Down", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "Offline", + "value": "1" + }, + { + "op": "=", + "text": "OK", + "value": "0" + } + ], + "valueName": "current" + } + ], + "title": "Row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "editable": true, + "error": false, + "fill": 0, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 2, + "isNew": true, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_MemFree", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "metric": "node_memory_MemFree", + "refId": "A", + "step": 30 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Free RAM", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "editable": true, + "error": false, + "fill": 0, + "grid": { + "threshold1": 500000000, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": 250000000, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 3, + "isNew": true, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_SwapFree", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "refId": "A", + "step": 30 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Free Swap", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 6, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_virtual_memory_bytes", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "metric": "process_virtual_memory_bytes", + "refId": "A", + "step": 20 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Virtual Mem Size", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "decimals": null, + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": 1, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": 2, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 5, + "isNew": true, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 8, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_load1", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{job}}", + "metric": "node_load1", + "refId": "A", + "step": 15 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Load", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 0, + "grid": { + "threshold1": 20, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": 10, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 4, + "isNew": true, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(node_memory_MemFree + node_memory_SwapFree)/(node_memory_MemTotal + node_memory_SwapTotal)*100", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "refId": "A", + "step": 60 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Free Memory (RAM + Swap)", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": 100, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": 229, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "Prometheus", + "decimals": null, + "editable": true, + "error": false, + "fill": 0, + "grid": { + "threshold1": 2500000000, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": 1000000000, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 1, + "isNew": true, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_filesystem_avail{mountpoint=\"/\"}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "metric": "", + "refId": "A", + "step": 60 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Free Storage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "New row" + } + ], + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "templating": { + "list": [] + }, + "annotations": { + "list": [] + }, + "refresh": "10s", + "schemaVersion": 12, + "version": 5, + "links": [], + "gnetId": null +}