HPCplaybooks/roles/prom_server/templates/etc/prometheus.yml

# my global config
global:
  scrape_interval:     60s # By default, scrape targets every 15 seconds.
  evaluation_interval: 60s # By default, scrape targets every 15 seconds.
  # scrape_timeout is set to the global default (10s).

  # Attach these labels to any time series or alerts when communicating with
  # external systems (federation, remote storage, Alertmanager).
  external_labels:
      monitor: {{ ansible_hostname }}

# alert
alerting:
  alertmanagers:
  - scheme: http
    static_configs:
    - targets:
      - "alertmanager.kube.hpc.rug.nl"
    basic_auth:
       username: hpc
       password: {{ alertmanager_pass }}

# Load and evaluate rules in this file every 'evaluation_interval' seconds.
rule_files:
- '/etc/prometheus/alerting.rules'

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: 'prometheus'
    static_configs:
         - targets: ['localhost:9090']

  # peregrine
  - job_name: 'node'
    scrape_interval: 120s
    file_sd_configs:
        - files:
          - targets.json

  # peregrine
  - job_name: 'ipmi'
    scrape_interval: 120s
    file_sd_configs:
        - files:
          - ipmi-targets.json


  # Scrape the cadvisor container exporter
  - job_name: 'cadvisor'
    scrape_interval: 60s
    file_sd_configs:
        - files:
          - cadvisor.json