Files
ouranos/ansible/pplg/deploy.yml
Robert Helewka 3c2f8c57ca feat(observability): add SearXNG, Argos, and Pallas monitoring
- Add SearXNG syslog ingestion and blackbox health probes on miranda
  and rosalind for per-host attributable failure detection
- Scrape Argos MCP application metrics from miranda
- Add Pallas dashboard panels for downstream availability and turn
  error ratios
2026-05-24 23:52:53 -04:00

394 lines
12 KiB
YAML

---
# PPLG - Consolidated Observability & Admin Stack for Prospero
# PgAdmin, Prometheus, Loki, Grafana + OAuth2-Proxy (Prometheus UI)
# TLS termination handled by Titania HAProxy
# Red Panda Approved
- name: Deploy PPLG Stack
hosts: ubuntu
become: true
tasks:
- name: Check if host has pplg service
ansible.builtin.set_fact:
has_pplg_service: "{{'pplg' in services}}"
- name: Skip hosts without pplg service
ansible.builtin.meta: end_host
when: not has_pplg_service
# ===========================================================================
# APT Repositories
# ===========================================================================
- name: Add Grafana APT repository (Grafana + Loki)
ansible.builtin.deb822_repository:
name: grafana
types: [deb]
uris: https://apt.grafana.com
suites: [stable]
components: [main]
signed_by: https://apt.grafana.com/gpg.key
state: present
- name: Add PgAdmin APT repository
ansible.builtin.deb822_repository:
name: pgadmin4
types: [deb]
uris: https://ftp.postgresql.org/pub/pgadmin/pgadmin4/apt/{{ansible_distribution_release}}
suites: [pgadmin4]
components: [main]
signed_by: https://www.pgadmin.org/static/packages_pgadmin_org.pub
state: present
# ===========================================================================
# Package Installation
# ===========================================================================
- name: Install PPLG packages
ansible.builtin.apt:
name:
- acl
- prometheus
- loki
- grafana
- pgadmin4-web
state: present
update_cache: true
- name: Stop and disable Apache (pulled in by pgadmin4-web)
ansible.builtin.systemd:
name: apache2
state: stopped
enabled: false
# ===========================================================================
# Prometheus
# ===========================================================================
- name: Fix Prometheus directory permissions
ansible.builtin.file:
path: /var/lib/prometheus
owner: prometheus
group: prometheus
mode: '750'
recurse: true
- name: Create textfile collector directory
ansible.builtin.file:
path: /var/lib/prometheus/node-exporter
state: directory
owner: prometheus
group: prometheus
mode: '750'
- name: Template prometheus.yml
ansible.builtin.template:
src: prometheus.yml.j2
dest: /etc/prometheus/prometheus.yml
owner: prometheus
group: prometheus
mode: '640'
notify: restart prometheus
- name: Template alert_rules.yml
ansible.builtin.template:
src: alert_rules.yml.j2
dest: /etc/prometheus/alert_rules.yml
owner: prometheus
group: prometheus
mode: '640'
notify: restart prometheus
- name: Create Prometheus systemd override directory
ansible.builtin.file:
path: /etc/systemd/system/prometheus.service.d
state: directory
mode: '755'
- name: Enable remote write receiver
ansible.builtin.copy:
content: |
[Service]
ExecStart=
ExecStart=/usr/bin/prometheus --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/var/lib/prometheus/metrics2/ --web.console.templates=/etc/prometheus/consoles --web.console.libraries=/etc/prometheus/console_libraries --web.listen-address=0.0.0.0:9090 --web.external-url= --web.enable-remote-write-receiver
dest: /etc/systemd/system/prometheus.service.d/override.conf
mode: '644'
notify: restart prometheus
- name: Start and enable Prometheus service
ansible.builtin.systemd:
name: prometheus
state: started
enabled: true
daemon_reload: true
# ===========================================================================
# Prometheus Alertmanager
# ===========================================================================
- name: Install Alertmanager
ansible.builtin.apt:
name: prometheus-alertmanager
state: present
- name: Create alertmanager configuration directory
ansible.builtin.file:
path: /etc/alertmanager
state: directory
owner: prometheus
group: prometheus
mode: '750'
- name: Template alertmanager.yml
ansible.builtin.template:
src: alertmanager.yml.j2
dest: /etc/alertmanager/alertmanager.yml
owner: prometheus
group: prometheus
mode: '640'
notify: restart alertmanager
- name: Start and enable Alertmanager service
ansible.builtin.systemd:
name: prometheus-alertmanager
state: started
enabled: true
daemon_reload: true
# ===========================================================================
# Loki
# ===========================================================================
- name: Create loki group
ansible.builtin.group:
name: "{{loki_group}}"
- name: Create loki user
ansible.builtin.user:
name: "{{loki_user}}"
comment: "{{loki_user}}"
group: "{{loki_group}}"
system: true
- name: Create loki directories
ansible.builtin.file:
path: "{{item}}"
owner: "{{loki_user}}"
group: "{{loki_group}}"
state: directory
mode: '750'
loop:
- "{{loki_data_dir}}"
- "{{loki_config_dir}}"
- name: Template Loki configuration
ansible.builtin.template:
src: "{{loki_config_file}}.j2"
dest: "{{loki_config_dir}}/{{loki_config_file}}"
owner: "{{loki_user}}"
group: "{{loki_group}}"
mode: '550'
notify: restart loki
- name: Enable and start Loki service
ansible.builtin.systemd:
name: loki
enabled: true
state: started
# ===========================================================================
# Grafana
# ===========================================================================
- name: Template Grafana main configuration
ansible.builtin.template:
src: "grafana.ini.j2"
dest: "/etc/grafana/grafana.ini"
owner: grafana
group: grafana
mode: '640'
when: grafana_oauth_enabled | default(false)
notify: restart grafana
- name: Enable and start Grafana service
ansible.builtin.systemd:
name: grafana-server
enabled: true
state: started
daemon_reload: true
# ===========================================================================
# PgAdmin (Gunicorn - no Apache)
# ===========================================================================
- name: Create pgadmin group
ansible.builtin.group:
name: "{{pgadmin_group}}"
system: true
- name: Create pgadmin user
ansible.builtin.user:
name: "{{pgadmin_user}}"
comment: "PgAdmin Service"
group: "{{pgadmin_group}}"
system: true
create_home: false
shell: /usr/sbin/nologin
- name: Create PgAdmin directories
ansible.builtin.file:
path: "{{item}}"
state: directory
owner: "{{pgadmin_user}}"
group: "{{pgadmin_group}}"
mode: '750'
loop:
- "{{pgadmin_data_dir}}"
- "{{pgadmin_data_dir}}/sessions"
- "{{pgadmin_data_dir}}/storage"
- "{{pgadmin_data_dir}}/certs"
- "{{pgadmin_log_dir}}"
- name: Install gunicorn into PgAdmin venv
ansible.builtin.command:
cmd: /usr/pgadmin4/venv/bin/pip install gunicorn
register: pip_gunicorn
changed_when: "'Successfully installed' in pip_gunicorn.stdout"
- name: Template PgAdmin local config
ansible.builtin.template:
src: config_local.py.j2
dest: /usr/pgadmin4/web/config_local.py
owner: "{{pgadmin_user}}"
group: "{{pgadmin_group}}"
mode: '640'
notify: restart pgadmin
- name: Fetch Titania PostgreSQL SSL cert
ansible.builtin.fetch:
src: /etc/postgresql/17/main/ssl/server.crt
dest: /tmp/titania-postgres-ca.crt
flat: yes
delegate_to: titania.incus
when: "'titania.incus' in groups['ubuntu']"
- name: Copy Titania PostgreSQL SSL cert to PgAdmin
ansible.builtin.copy:
src: /tmp/titania-postgres-ca.crt
dest: "{{pgadmin_data_dir}}/certs/titania-postgres-ca.crt"
owner: "{{pgadmin_user}}"
group: "{{pgadmin_group}}"
mode: '0644'
when: "'titania.incus' in groups['ubuntu']"
- name: Template PgAdmin systemd service
ansible.builtin.template:
src: pgadmin.service.j2
dest: /etc/systemd/system/pgadmin.service
owner: root
group: root
mode: '0644'
notify: restart pgadmin
# ===========================================================================
# OAuth2-Proxy Sidecar (Prometheus UI)
# ===========================================================================
- name: Create oauth2-proxy config directory
ansible.builtin.file:
path: "{{prometheus_oauth2_proxy_dir}}"
owner: root
group: root
state: directory
mode: '0755'
- name: Download oauth2-proxy binary
ansible.builtin.get_url:
url: "https://github.com/oauth2-proxy/oauth2-proxy/releases/download/v{{prometheus_oauth2_proxy_version}}/oauth2-proxy-v{{prometheus_oauth2_proxy_version}}.linux-amd64.tar.gz"
dest: "/tmp/oauth2-proxy-v{{prometheus_oauth2_proxy_version}}.tar.gz"
mode: '0644'
- name: Extract oauth2-proxy binary
ansible.builtin.unarchive:
src: "/tmp/oauth2-proxy-v{{prometheus_oauth2_proxy_version}}.tar.gz"
dest: /tmp
remote_src: true
creates: "/tmp/oauth2-proxy-v{{prometheus_oauth2_proxy_version}}.linux-amd64/oauth2-proxy"
- name: Install oauth2-proxy binary
ansible.builtin.copy:
src: "/tmp/oauth2-proxy-v{{prometheus_oauth2_proxy_version}}.linux-amd64/oauth2-proxy"
dest: /usr/local/bin/oauth2-proxy
owner: root
group: root
mode: '0755'
remote_src: true
- name: Template oauth2-proxy configuration for Prometheus
ansible.builtin.template:
src: oauth2-proxy-prometheus.cfg.j2
dest: "{{prometheus_oauth2_proxy_dir}}/oauth2-proxy.cfg"
owner: root
group: root
mode: '0600'
notify: restart oauth2-proxy-prometheus
- name: Template oauth2-proxy systemd service for Prometheus
ansible.builtin.template:
src: oauth2-proxy-prometheus.service.j2
dest: /etc/systemd/system/oauth2-proxy-prometheus.service
owner: root
group: root
mode: '0644'
notify:
- reload systemd
- restart oauth2-proxy-prometheus
- name: Enable and start OAuth2-Proxy for Prometheus
ansible.builtin.systemd:
name: oauth2-proxy-prometheus
enabled: true
state: started
daemon_reload: true
# ===========================================================================
# Handlers
# ===========================================================================
handlers:
- name: restart prometheus
ansible.builtin.systemd:
name: prometheus
state: restarted
daemon_reload: true
- name: restart alertmanager
ansible.builtin.systemd:
name: prometheus-alertmanager
state: restarted
- name: restart loki
ansible.builtin.systemd:
name: loki
state: restarted
- name: restart grafana
ansible.builtin.systemd:
name: grafana-server
state: restarted
- name: restart pgadmin
ansible.builtin.systemd:
name: pgadmin
state: restarted
daemon_reload: true
- name: reload systemd
ansible.builtin.systemd:
daemon_reload: true
- name: restart oauth2-proxy-prometheus
ansible.builtin.systemd:
name: oauth2-proxy-prometheus
state: restarted