From 29dc390edf93c54a7567eabfcb36acfb68e652c6 Mon Sep 17 00:00:00 2001 From: paul Date: Wed, 27 Jul 2016 16:09:16 +0800 Subject: [PATCH] add alertmanager --- alertmanager/config.yml | 33 +++++++++++++++++++++++++++++++++ docker-compose.yml | 15 ++++++++++++++- prometheus/alert.rules | 7 +++++++ prometheus/prometheus.yml | 1 + 4 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 alertmanager/config.yml create mode 100644 prometheus/alert.rules diff --git a/alertmanager/config.yml b/alertmanager/config.yml new file mode 100644 index 0000000..8031e7f --- /dev/null +++ b/alertmanager/config.yml @@ -0,0 +1,33 @@ +global: + # The smarthost and SMTP sender used for mail notifications. + smtp_smarthost: 'localhost:25' + smtp_from: 'alertmanager@example.org' + smtp_auth_username: 'alertmanager' + smtp_auth_password: 'password' + # The auth token for Hipchat. + hipchat_auth_token: '1234556789' + # Alternative host for Hipchat. + hipchat_url: 'https://hipchat.foobar.org/' + +# The directory from which notification templates are read. +templates: +- '/etc/alertmanager/template/*.tmpl' + +# The root route on which each incoming alert enters. +route: + group_by: [cluster] + # If an alert isn't caught by a route, send it slack. + receiver: slack_general + + # The child route trees. + routes: + # Send severity=slack alerts to slack. + - match: + severity: slack + receiver: slack_general +receivers: +- name: slack_general + slack_configs: + - api_url: 'https://hooks.slack.com/services/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' + channel: '#prometheus' + send_resolved: true \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 6a75aa9..00f9c2b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -20,10 +20,12 @@ services: command: - '-config.file=/etc/prometheus/prometheus.yml' - '-storage.local.path=/prometheus' + - '-alertmanager.url=http://alertmanager:9093' expose: - 9090 links: - cadvisor:cadvisor + - alertmanager:alertmanager depends_on: - cadvisor networks: @@ -35,7 +37,18 @@ services: - 9100 networks: - back-tier - + alertmanager: + image: prom/alertmanager + ports: + - 9093:9093 + volumes: + - ./alertmanager/:/etc/alertmanager/ + networks: + - back-tier + command: + - '-config.file=/etc/alertmanager/config.yml' + - '-storage.path=/alertmanager' + cadvisor: image: google/cadvisor volumes: diff --git a/prometheus/alert.rules b/prometheus/alert.rules new file mode 100644 index 0000000..1688ee9 --- /dev/null +++ b/prometheus/alert.rules @@ -0,0 +1,7 @@ +ALERT instance_down +IF up == 0 +FOR 5s +LABELS {severity="page"} +ANNOTATIONS { + DESCRIPTION="{{$labels.instance}} of job {{$labels.job}} has been down for more than 5 seconds.", + SUMMARY="Instance {{$labels.instance}} down"} \ No newline at end of file diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml index 4fd78c0..c537348 100644 --- a/prometheus/prometheus.yml +++ b/prometheus/prometheus.yml @@ -11,6 +11,7 @@ global: # Load and evaluate rules in this file every 'evaluation_interval' seconds. rule_files: + - "alert.rules" # - "first.rules" # - "second.rules"