1
0
Fork 0
HPCplaybooks/roles/prom_server/templates/etc/prometheus.yml

56 lines
1.4 KiB
YAML

# my global config
global:
scrape_interval: 60s # By default, scrape targets every 15 seconds.
evaluation_interval: 60s # By default, scrape targets every 15 seconds.
# scrape_timeout is set to the global default (10s).
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: {{ ansible_hostname }}
# alert
alerting:
alertmanagers:
- scheme: http
static_configs:
- targets:
- "alertmanager.kube.hpc.rug.nl"
basic_auth:
username: hpc
password: {{ alertmanager_pass }}
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
rule_files:
- '/etc/prometheus/alerting.rules'
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
# peregrine
- job_name: 'node'
scrape_interval: 120s
file_sd_configs:
- files:
- targets.json
# peregrine
- job_name: 'ipmi'
scrape_interval: 120s
file_sd_configs:
- files:
- ipmi-targets.json
# Scrape the cadvisor container exporter
- job_name: 'cadvisor'
scrape_interval: 60s
file_sd_configs:
- files:
- cadvisor.json