Merge pull request #13 from llitfkitfk/version-2

Add AlertManager
This commit is contained in:
Brian Christner 2016-07-28 10:16:19 +02:00 committed by GitHub
commit 00a44359f9
5 changed files with 295 additions and 1 deletions

259
HighLoadDashboard.json Normal file
View File

@ -0,0 +1,259 @@
{
"__inputs": [
{
"name": "DS_PROMETHEUS",
"label": "Prometheus",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__requires": [
{
"type": "panel",
"id": "graph",
"name": "Graph",
"version": ""
},
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "3.1.0"
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
}
],
"id": null,
"title": "High Load",
"tags": [],
"style": "dark",
"timezone": "browser",
"editable": true,
"hideControls": false,
"sharedCrosshair": false,
"rows": [
{
"collapse": false,
"editable": true,
"height": 323.625,
"panels": [
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_PROMETHEUS}",
"editable": true,
"error": false,
"fill": 1,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 1,
"isNew": true,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "node_load1",
"hide": false,
"intervalFactor": 2,
"legendFormat": "",
"metric": "node_load1",
"refId": "A",
"step": 10
}
],
"timeFrom": null,
"timeShift": null,
"title": "Panel Title",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "Row"
},
{
"collapse": false,
"editable": true,
"height": 407.4375,
"panels": [
{
"aliasColors": {
"ALERTS{alertname=\"high_load\",alertstate=\"firing\",instance=\"node-exporter:9100\",job=\"prometheus\"}": "#BF1B00"
},
"bars": false,
"datasource": "${DS_PROMETHEUS}",
"editable": true,
"error": false,
"fill": 1,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 3,
"isNew": true,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "ALERTS",
"intervalFactor": 1,
"metric": "ALERTS",
"refId": "A",
"step": 5
}
],
"timeFrom": null,
"timeShift": null,
"title": "Panel Title",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "New row"
}
],
"time": {
"from": "now-3h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"templating": {
"list": []
},
"annotations": {
"list": []
},
"refresh": "10s",
"schemaVersion": 12,
"version": 4,
"links": [],
"gnetId": null
}

10
alertmanager/config.yml Normal file
View File

@ -0,0 +1,10 @@
route:
receiver: 'slack'
receivers:
- name: 'slack'
slack_configs:
- send_resolved: true
username: '<username>'
channel: '#<channel-name>'
api_url: '<incomming-webhook-url>'

View File

@ -20,10 +20,14 @@ services:
command:
- '-config.file=/etc/prometheus/prometheus.yml'
- '-storage.local.path=/prometheus'
- '-alertmanager.url=http://alertmanager:9093'
expose:
- 9090
ports:
- 9090:9090
links:
- cadvisor:cadvisor
- alertmanager:alertmanager
depends_on:
- cadvisor
networks:
@ -35,7 +39,18 @@ services:
- 9100
networks:
- back-tier
alertmanager:
image: prom/alertmanager
ports:
- 9093:9093
volumes:
- ./alertmanager/:/etc/alertmanager/
networks:
- back-tier
command:
- '-config.file=/etc/alertmanager/config.yml'
- '-storage.path=/alertmanager'
cadvisor:
image: google/cadvisor
volumes:

9
prometheus/alert.rules Normal file
View File

@ -0,0 +1,9 @@
ALERT service_down
IF up == 0
ALERT high_load
IF node_load1 > 0.5
ANNOTATIONS {
summary = "Instance {{ $labels.instance }} under high load",
description = "{{ $labels.instance }} of job {{ $labels.job }} is under high load.",
}

View File

@ -11,6 +11,7 @@ global:
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
rule_files:
- "alert.rules"
# - "first.rules"
# - "second.rules"