Merge branch 'version-2' of https://github.com/vegasbrianc/prometheus into version-2
This commit is contained in:
commit
83ce3bb4ec
|
@ -36,7 +36,7 @@
|
|||
}
|
||||
],
|
||||
"id": null,
|
||||
"title": "Docker Monitoring with Prometheus",
|
||||
"title": "Grafana_Dashboard.json",
|
||||
"description": "Docker Monitoring Template",
|
||||
"tags": [
|
||||
"docker"
|
||||
|
@ -44,7 +44,7 @@
|
|||
"style": "dark",
|
||||
"timezone": "browser",
|
||||
"editable": true,
|
||||
"hideControls": true,
|
||||
"hideControls": false,
|
||||
"sharedCrosshair": true,
|
||||
"rows": [
|
||||
{
|
||||
|
@ -340,7 +340,7 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(sum(container_cpu_user_seconds_total{image!=\"\"}) by (name))",
|
||||
"expr": "sort_desc(sum(rate(container_cpu_user_seconds_total{image!=\"\"}[1m])) by (name))",
|
||||
"interval": "10s",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{ name }}",
|
||||
|
@ -364,7 +364,7 @@
|
|||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "percent",
|
||||
"format": "percentunit",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
|
@ -475,97 +475,173 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"datasource": "${DS_PROMETHEUS}",
|
||||
"decimals": 2,
|
||||
"editable": true,
|
||||
"error": false,
|
||||
"fill": 0,
|
||||
"grid": {
|
||||
"threshold1": null,
|
||||
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||
"threshold2": null,
|
||||
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||
},
|
||||
"id": 8,
|
||||
"isNew": true,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": 200,
|
||||
"sort": "current",
|
||||
"sortDesc": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 2,
|
||||
"links": [],
|
||||
"nullPointMode": "connected",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"span": 12,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (container_name) (rate(container_network_receive_bytes_total{image!=\"\"}[1m] ) )",
|
||||
"interval": "10s",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{ Receive }}",
|
||||
"metric": "container_network_receive_bytes_total",
|
||||
"refId": "A",
|
||||
"step": 10
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"datasource": "${DS_PROMETHEUS}",
|
||||
"decimals": 2,
|
||||
"editable": true,
|
||||
"error": false,
|
||||
"fill": 0,
|
||||
"grid": {
|
||||
"threshold1": null,
|
||||
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||
"threshold2": null,
|
||||
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (container_name) (rate(container_network_transmit_bytes_total{image!=\"\"}[1m] ) )",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{ Transmit }}",
|
||||
"metric": "container_network_transmit_bytes_total",
|
||||
"refId": "B",
|
||||
"step": 4
|
||||
}
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Total Network i/o",
|
||||
"tooltip": {
|
||||
"msResolution": false,
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "cumulative"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"show": true
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
"id": 8,
|
||||
"isNew": true,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": 200,
|
||||
"sort": "current",
|
||||
"sortDesc": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
"lines": true,
|
||||
"linewidth": 2,
|
||||
"links": [ ],
|
||||
"nullPointMode": "connected",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"span": 12,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(sum by (name) (rate(container_network_receive_bytes_total{image!=\"\",alias=\"$host\"}[1m] ) ))",
|
||||
"interval": "10s",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{ name }}",
|
||||
"metric": "container_network_receive_bytes_total",
|
||||
"refId": "A",
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Container Network Input",
|
||||
"tooltip": {
|
||||
"msResolution": false,
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "cumulative"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"show": true
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"datasource": "${DS_PROMETHEUS}",
|
||||
"decimals": 2,
|
||||
"editable": true,
|
||||
"error": false,
|
||||
"fill": 0,
|
||||
"grid": {
|
||||
"threshold1": null,
|
||||
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||
"threshold2": null,
|
||||
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||
},
|
||||
"id": 9,
|
||||
"isNew": true,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": 200,
|
||||
"sort": "current",
|
||||
"sortDesc": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 2,
|
||||
"links": [ ],
|
||||
"nullPointMode": "connected",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"span": 12,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(sum by (name) (rate(container_network_transmit_bytes_total{image!=\"\", alias=\"$host\"}[1m] ) ))",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{ name }}",
|
||||
"metric": "container_network_transmit_bytes_total",
|
||||
"refId": "B",
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Container Network Output",
|
||||
"tooltip": {
|
||||
"msResolution": false,
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "cumulative"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"show": true
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "New row"
|
||||
|
@ -608,7 +684,7 @@
|
|||
},
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 12,
|
||||
"version": 6,
|
||||
"version": 1,
|
||||
"links": [],
|
||||
"gnetId": 162
|
||||
}
|
||||
|
|
10
README.md
10
README.md
|
@ -44,7 +44,15 @@ The Slack configuration requires to build a custom integration.
|
|||
* Copy the Webhook URL into the `alertmanager/config.yml` URL section
|
||||
* Fill in Slack username and channel
|
||||
|
||||
View alerts status `http://<Host IP Address>:9090/alerts`
|
||||
View Prometheus alerts `http://<Host IP Address>:9090/alerts`
|
||||
View Alert Manager `http://<Host IP Address>:9093`
|
||||
|
||||
### Test Alerts
|
||||
A quick test for your alerts is to stop a service. Stop the node_exporter container and you should notice shortly the alert arrive in Slack. Also check the alerts in both the Alert Manager and Prometheus Alerts just to understand how they flow through the system.
|
||||
|
||||
High load test alert - `docker run --rm -it busybox sh -c "while true; do :; done"``
|
||||
|
||||
Let this run for a few minutes and you will notice the load alert appear.
|
||||
|
||||
## Install Dashboard
|
||||
I created a Dashboard template which is available on [Grafana Docker Dashboard](https://grafana.net/dashboards/179). Simply download the dashboard and select from the Grafana menu -> Dashboards -> Import
|
||||
|
|
Loading…
Reference in New Issue