add alertmanager
This commit is contained in:
parent
945fb739c9
commit
29dc390edf
33
alertmanager/config.yml
Normal file
33
alertmanager/config.yml
Normal file
@ -0,0 +1,33 @@
|
||||
global:
|
||||
# The smarthost and SMTP sender used for mail notifications.
|
||||
smtp_smarthost: 'localhost:25'
|
||||
smtp_from: 'alertmanager@example.org'
|
||||
smtp_auth_username: 'alertmanager'
|
||||
smtp_auth_password: 'password'
|
||||
# The auth token for Hipchat.
|
||||
hipchat_auth_token: '1234556789'
|
||||
# Alternative host for Hipchat.
|
||||
hipchat_url: 'https://hipchat.foobar.org/'
|
||||
|
||||
# The directory from which notification templates are read.
|
||||
templates:
|
||||
- '/etc/alertmanager/template/*.tmpl'
|
||||
|
||||
# The root route on which each incoming alert enters.
|
||||
route:
|
||||
group_by: [cluster]
|
||||
# If an alert isn't caught by a route, send it slack.
|
||||
receiver: slack_general
|
||||
|
||||
# The child route trees.
|
||||
routes:
|
||||
# Send severity=slack alerts to slack.
|
||||
- match:
|
||||
severity: slack
|
||||
receiver: slack_general
|
||||
receivers:
|
||||
- name: slack_general
|
||||
slack_configs:
|
||||
- api_url: 'https://hooks.slack.com/services/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'
|
||||
channel: '#prometheus'
|
||||
send_resolved: true
|
@ -20,10 +20,12 @@ services:
|
||||
command:
|
||||
- '-config.file=/etc/prometheus/prometheus.yml'
|
||||
- '-storage.local.path=/prometheus'
|
||||
- '-alertmanager.url=http://alertmanager:9093'
|
||||
expose:
|
||||
- 9090
|
||||
links:
|
||||
- cadvisor:cadvisor
|
||||
- alertmanager:alertmanager
|
||||
depends_on:
|
||||
- cadvisor
|
||||
networks:
|
||||
@ -35,7 +37,18 @@ services:
|
||||
- 9100
|
||||
networks:
|
||||
- back-tier
|
||||
|
||||
alertmanager:
|
||||
image: prom/alertmanager
|
||||
ports:
|
||||
- 9093:9093
|
||||
volumes:
|
||||
- ./alertmanager/:/etc/alertmanager/
|
||||
networks:
|
||||
- back-tier
|
||||
command:
|
||||
- '-config.file=/etc/alertmanager/config.yml'
|
||||
- '-storage.path=/alertmanager'
|
||||
|
||||
cadvisor:
|
||||
image: google/cadvisor
|
||||
volumes:
|
||||
|
7
prometheus/alert.rules
Normal file
7
prometheus/alert.rules
Normal file
@ -0,0 +1,7 @@
|
||||
ALERT instance_down
|
||||
IF up == 0
|
||||
FOR 5s
|
||||
LABELS {severity="page"}
|
||||
ANNOTATIONS {
|
||||
DESCRIPTION="{{$labels.instance}} of job {{$labels.job}} has been down for more than 5 seconds.",
|
||||
SUMMARY="Instance {{$labels.instance}} down"}
|
@ -11,6 +11,7 @@ global:
|
||||
|
||||
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
|
||||
rule_files:
|
||||
- "alert.rules"
|
||||
# - "first.rules"
|
||||
# - "second.rules"
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user