commit
00a44359f9
259
HighLoadDashboard.json
Normal file
259
HighLoadDashboard.json
Normal file
@ -0,0 +1,259 @@
|
||||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_PROMETHEUS",
|
||||
"label": "Prometheus",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "prometheus",
|
||||
"pluginName": "Prometheus"
|
||||
}
|
||||
],
|
||||
"__requires": [
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "graph",
|
||||
"name": "Graph",
|
||||
"version": ""
|
||||
},
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "3.1.0"
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "prometheus",
|
||||
"name": "Prometheus",
|
||||
"version": "1.0.0"
|
||||
}
|
||||
],
|
||||
"id": null,
|
||||
"title": "High Load",
|
||||
"tags": [],
|
||||
"style": "dark",
|
||||
"timezone": "browser",
|
||||
"editable": true,
|
||||
"hideControls": false,
|
||||
"sharedCrosshair": false,
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
"editable": true,
|
||||
"height": 323.625,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"datasource": "${DS_PROMETHEUS}",
|
||||
"editable": true,
|
||||
"error": false,
|
||||
"fill": 1,
|
||||
"grid": {
|
||||
"threshold1": null,
|
||||
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||
"threshold2": null,
|
||||
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||
},
|
||||
"id": 1,
|
||||
"isNew": true,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 2,
|
||||
"links": [],
|
||||
"nullPointMode": "connected",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"span": 12,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "node_load1",
|
||||
"hide": false,
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
"metric": "node_load1",
|
||||
"refId": "A",
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Panel Title",
|
||||
"tooltip": {
|
||||
"msResolution": false,
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "cumulative"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"show": true
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Row"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"editable": true,
|
||||
"height": 407.4375,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
"ALERTS{alertname=\"high_load\",alertstate=\"firing\",instance=\"node-exporter:9100\",job=\"prometheus\"}": "#BF1B00"
|
||||
},
|
||||
"bars": false,
|
||||
"datasource": "${DS_PROMETHEUS}",
|
||||
"editable": true,
|
||||
"error": false,
|
||||
"fill": 1,
|
||||
"grid": {
|
||||
"threshold1": null,
|
||||
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||
"threshold2": null,
|
||||
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||
},
|
||||
"id": 3,
|
||||
"isNew": true,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 2,
|
||||
"links": [],
|
||||
"nullPointMode": "connected",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"span": 12,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ALERTS",
|
||||
"intervalFactor": 1,
|
||||
"metric": "ALERTS",
|
||||
"refId": "A",
|
||||
"step": 5
|
||||
}
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Panel Title",
|
||||
"tooltip": {
|
||||
"msResolution": false,
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "cumulative"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"show": true
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "New row"
|
||||
}
|
||||
],
|
||||
"time": {
|
||||
"from": "now-3h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"annotations": {
|
||||
"list": []
|
||||
},
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 12,
|
||||
"version": 4,
|
||||
"links": [],
|
||||
"gnetId": null
|
||||
}
|
10
alertmanager/config.yml
Normal file
10
alertmanager/config.yml
Normal file
@ -0,0 +1,10 @@
|
||||
route:
|
||||
receiver: 'slack'
|
||||
|
||||
receivers:
|
||||
- name: 'slack'
|
||||
slack_configs:
|
||||
- send_resolved: true
|
||||
username: '<username>'
|
||||
channel: '#<channel-name>'
|
||||
api_url: '<incomming-webhook-url>'
|
@ -20,10 +20,14 @@ services:
|
||||
command:
|
||||
- '-config.file=/etc/prometheus/prometheus.yml'
|
||||
- '-storage.local.path=/prometheus'
|
||||
- '-alertmanager.url=http://alertmanager:9093'
|
||||
expose:
|
||||
- 9090
|
||||
ports:
|
||||
- 9090:9090
|
||||
links:
|
||||
- cadvisor:cadvisor
|
||||
- alertmanager:alertmanager
|
||||
depends_on:
|
||||
- cadvisor
|
||||
networks:
|
||||
@ -35,7 +39,18 @@ services:
|
||||
- 9100
|
||||
networks:
|
||||
- back-tier
|
||||
|
||||
alertmanager:
|
||||
image: prom/alertmanager
|
||||
ports:
|
||||
- 9093:9093
|
||||
volumes:
|
||||
- ./alertmanager/:/etc/alertmanager/
|
||||
networks:
|
||||
- back-tier
|
||||
command:
|
||||
- '-config.file=/etc/alertmanager/config.yml'
|
||||
- '-storage.path=/alertmanager'
|
||||
|
||||
cadvisor:
|
||||
image: google/cadvisor
|
||||
volumes:
|
||||
|
9
prometheus/alert.rules
Normal file
9
prometheus/alert.rules
Normal file
@ -0,0 +1,9 @@
|
||||
ALERT service_down
|
||||
IF up == 0
|
||||
|
||||
ALERT high_load
|
||||
IF node_load1 > 0.5
|
||||
ANNOTATIONS {
|
||||
summary = "Instance {{ $labels.instance }} under high load",
|
||||
description = "{{ $labels.instance }} of job {{ $labels.job }} is under high load.",
|
||||
}
|
@ -11,6 +11,7 @@ global:
|
||||
|
||||
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
|
||||
rule_files:
|
||||
- "alert.rules"
|
||||
# - "first.rules"
|
||||
# - "second.rules"
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user