Files
ouranos/ansible/pplg/prometheus.yml.j2
Robert Helewka 3c2f8c57ca feat(observability): add SearXNG, Argos, and Pallas monitoring
- Add SearXNG syslog ingestion and blackbox health probes on miranda
  and rosalind for per-host attributable failure detection
- Scrape Argos MCP application metrics from miranda
- Add Pallas dashboard panels for downstream availability and turn
  error ratios
2026-05-24 23:52:53 -04:00

95 lines
3.2 KiB
Django/Jinja

global:
scrape_interval: {{ prometheus_scrape_interval }}
evaluation_interval: {{ prometheus_evaluation_interval }}
alerting:
alertmanagers:
- static_configs:
- targets:
- {{ alertmanager_host }}:{{ alertmanager_port }}
rule_files:
- "alert_rules.yml"
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'node-exporter'
static_configs:
- targets: {{ prometheus_targets | to_json }}
- job_name: 'alertmanager'
static_configs:
- targets: ['{{ alertmanager_host }}:{{ alertmanager_port }}']
- job_name: 'haproxy'
static_configs:
- targets: ['titania.incus:8404']
metrics_path: '/metrics'
- job_name: 'gitea'
static_configs:
- targets: ['oberon.incus:22084']
metrics_path: '/metrics'
authorization:
type: Bearer
credentials: '{{ vault_gitea_metrics_token }}'
- job_name: 'casdoor'
static_configs:
- targets: ['{{ casdoor_metrics_host }}:{{ casdoor_metrics_port }}']
metrics_path: '/api/metrics'
params:
accessKey: ['{{ casdoor_prometheus_access_key }}']
accessSecret: ['{{ casdoor_prometheus_access_secret }}']
- job_name: 'daedalus'
static_configs:
- targets: ['{{ daedalus_metrics_host }}:{{ daedalus_metrics_port }}']
metrics_path: '/metrics'
scrape_interval: 15s
# Mnemosyne — app exposes /metrics on the Django container (proxied via
# nginx); a single prometheus_client process registry serves both
# django-prometheus (HTTP/Celery) and the MCP server's tool-call counters
# (the mcp container itself does not expose /metrics). Web is an
# nginx-prometheus-exporter sidecar that scrapes the web container's
# stub_status and re-exposes it in Prometheus format.
- job_name: 'mnemosyne'
metrics_path: '/metrics'
scrape_interval: 15s
static_configs:
- targets: ['{{ mnemosyne_app_metrics_host }}:{{ mnemosyne_app_metrics_port }}']
labels:
component: app
- targets: ['{{ mnemosyne_web_metrics_host }}:{{ mnemosyne_web_metrics_port }}']
labels:
component: web
# Pallas — each deployment is one scrape target (registry port).
# Pallas uses a single process-global registry, so per-agent /metrics
# endpoints serve the same snapshot; the `agent` dimension is carried
# as a metric label, not a target. Targets are defined per
# environment in pallas_metrics_targets (host_vars on the Prometheus
# host); instances are differentiated by the `instance` label.
{% if pallas_metrics_targets | default([]) %}
- job_name: 'pallas'
metrics_path: '/metrics'
scrape_interval: 15s
static_configs: {{ pallas_metrics_targets | to_json }}
{% endif %}
# Neo4j — stscoundrel/neo4j-apoc-exporter sidecar connects to the local
# Neo4j over Bolt and exposes apoc.monitor.* (tx/ids/store) plus JVM
# metrics. Targets are listed per-environment in neo4j_metrics_targets
# (host_vars on the Prometheus host) — instances are differentiated by
# hostname only.
- job_name: 'neo4j'
static_configs:
- targets: {{ neo4j_metrics_targets | to_json }}
metrics_path: '/metrics'
scrape_interval: 15s