add alertmanager
This commit is contained in:
parent
945fb739c9
commit
29dc390edf
33
alertmanager/config.yml
Normal file
33
alertmanager/config.yml
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
global:
|
||||||
|
# The smarthost and SMTP sender used for mail notifications.
|
||||||
|
smtp_smarthost: 'localhost:25'
|
||||||
|
smtp_from: 'alertmanager@example.org'
|
||||||
|
smtp_auth_username: 'alertmanager'
|
||||||
|
smtp_auth_password: 'password'
|
||||||
|
# The auth token for Hipchat.
|
||||||
|
hipchat_auth_token: '1234556789'
|
||||||
|
# Alternative host for Hipchat.
|
||||||
|
hipchat_url: 'https://hipchat.foobar.org/'
|
||||||
|
|
||||||
|
# The directory from which notification templates are read.
|
||||||
|
templates:
|
||||||
|
- '/etc/alertmanager/template/*.tmpl'
|
||||||
|
|
||||||
|
# The root route on which each incoming alert enters.
|
||||||
|
route:
|
||||||
|
group_by: [cluster]
|
||||||
|
# If an alert isn't caught by a route, send it slack.
|
||||||
|
receiver: slack_general
|
||||||
|
|
||||||
|
# The child route trees.
|
||||||
|
routes:
|
||||||
|
# Send severity=slack alerts to slack.
|
||||||
|
- match:
|
||||||
|
severity: slack
|
||||||
|
receiver: slack_general
|
||||||
|
receivers:
|
||||||
|
- name: slack_general
|
||||||
|
slack_configs:
|
||||||
|
- api_url: 'https://hooks.slack.com/services/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'
|
||||||
|
channel: '#prometheus'
|
||||||
|
send_resolved: true
|
@ -20,10 +20,12 @@ services:
|
|||||||
command:
|
command:
|
||||||
- '-config.file=/etc/prometheus/prometheus.yml'
|
- '-config.file=/etc/prometheus/prometheus.yml'
|
||||||
- '-storage.local.path=/prometheus'
|
- '-storage.local.path=/prometheus'
|
||||||
|
- '-alertmanager.url=http://alertmanager:9093'
|
||||||
expose:
|
expose:
|
||||||
- 9090
|
- 9090
|
||||||
links:
|
links:
|
||||||
- cadvisor:cadvisor
|
- cadvisor:cadvisor
|
||||||
|
- alertmanager:alertmanager
|
||||||
depends_on:
|
depends_on:
|
||||||
- cadvisor
|
- cadvisor
|
||||||
networks:
|
networks:
|
||||||
@ -35,7 +37,18 @@ services:
|
|||||||
- 9100
|
- 9100
|
||||||
networks:
|
networks:
|
||||||
- back-tier
|
- back-tier
|
||||||
|
alertmanager:
|
||||||
|
image: prom/alertmanager
|
||||||
|
ports:
|
||||||
|
- 9093:9093
|
||||||
|
volumes:
|
||||||
|
- ./alertmanager/:/etc/alertmanager/
|
||||||
|
networks:
|
||||||
|
- back-tier
|
||||||
|
command:
|
||||||
|
- '-config.file=/etc/alertmanager/config.yml'
|
||||||
|
- '-storage.path=/alertmanager'
|
||||||
|
|
||||||
cadvisor:
|
cadvisor:
|
||||||
image: google/cadvisor
|
image: google/cadvisor
|
||||||
volumes:
|
volumes:
|
||||||
|
7
prometheus/alert.rules
Normal file
7
prometheus/alert.rules
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
ALERT instance_down
|
||||||
|
IF up == 0
|
||||||
|
FOR 5s
|
||||||
|
LABELS {severity="page"}
|
||||||
|
ANNOTATIONS {
|
||||||
|
DESCRIPTION="{{$labels.instance}} of job {{$labels.job}} has been down for more than 5 seconds.",
|
||||||
|
SUMMARY="Instance {{$labels.instance}} down"}
|
@ -11,6 +11,7 @@ global:
|
|||||||
|
|
||||||
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
|
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
|
||||||
rule_files:
|
rule_files:
|
||||||
|
- "alert.rules"
|
||||||
# - "first.rules"
|
# - "first.rules"
|
||||||
# - "second.rules"
|
# - "second.rules"
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user