add alertmanager

This commit is contained in:
paul 2016-07-27 16:09:16 +08:00
parent 945fb739c9
commit 29dc390edf
4 changed files with 55 additions and 1 deletions

33
alertmanager/config.yml Normal file
View File

@ -0,0 +1,33 @@
global:
# The smarthost and SMTP sender used for mail notifications.
smtp_smarthost: 'localhost:25'
smtp_from: 'alertmanager@example.org'
smtp_auth_username: 'alertmanager'
smtp_auth_password: 'password'
# The auth token for Hipchat.
hipchat_auth_token: '1234556789'
# Alternative host for Hipchat.
hipchat_url: 'https://hipchat.foobar.org/'
# The directory from which notification templates are read.
templates:
- '/etc/alertmanager/template/*.tmpl'
# The root route on which each incoming alert enters.
route:
group_by: [cluster]
# If an alert isn't caught by a route, send it slack.
receiver: slack_general
# The child route trees.
routes:
# Send severity=slack alerts to slack.
- match:
severity: slack
receiver: slack_general
receivers:
- name: slack_general
slack_configs:
- api_url: 'https://hooks.slack.com/services/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'
channel: '#prometheus'
send_resolved: true

View File

@ -20,10 +20,12 @@ services:
command:
- '-config.file=/etc/prometheus/prometheus.yml'
- '-storage.local.path=/prometheus'
- '-alertmanager.url=http://alertmanager:9093'
expose:
- 9090
links:
- cadvisor:cadvisor
- alertmanager:alertmanager
depends_on:
- cadvisor
networks:
@ -35,7 +37,18 @@ services:
- 9100
networks:
- back-tier
alertmanager:
image: prom/alertmanager
ports:
- 9093:9093
volumes:
- ./alertmanager/:/etc/alertmanager/
networks:
- back-tier
command:
- '-config.file=/etc/alertmanager/config.yml'
- '-storage.path=/alertmanager'
cadvisor:
image: google/cadvisor
volumes:

7
prometheus/alert.rules Normal file
View File

@ -0,0 +1,7 @@
ALERT instance_down
IF up == 0
FOR 5s
LABELS {severity="page"}
ANNOTATIONS {
DESCRIPTION="{{$labels.instance}} of job {{$labels.job}} has been down for more than 5 seconds.",
SUMMARY="Instance {{$labels.instance}} down"}

View File

@ -11,6 +11,7 @@ global:
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
rule_files:
- "alert.rules"
# - "first.rules"
# - "second.rules"