docs: rewrite README with structured overview and quick start guide

Replaces the minimal project description with a comprehensive README including a component overview table, quick start instructions, common Ansible operations, and links to detailed documentation. Aligns with Red Panda Approval™ standards.
2026-03-03 12:49:06 +00:00
parent c7be03a743
commit b4d60f2f38
219 changed files with 34586 additions and 2 deletions
--- a/ansible/pplg/alert_rules.yml.j2
+++ b/ansible/pplg/alert_rules.yml.j2
@@ -0,0 +1,249 @@
+# Prometheus Alert Rules
+# Red Panda Approved 🐼
+# Deployed to: /etc/prometheus/alert_rules.yml
+{% raw %}
+groups:
+  # ============================================================================
+  # Node/Infrastructure Alerts
+  # ============================================================================
+  - name: node_alerts
+    rules:
+      - alert: InstanceDown
+        expr: up == 0
+        for: 2m
+        labels:
+          severity: critical
+        annotations:
+          summary: "Instance {{ $labels.instance }} is down"
+          description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
+
+      - alert: HighCPUUsage
+        expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
+        for: 5m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High CPU usage on {{ $labels.instance }}"
+          description: "CPU usage is above 80% on {{ $labels.instance }} (current value: {{ $value | printf \"%.1f\" }}%)"
+
+      - alert: CriticalCPUUsage
+        expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 95
+        for: 2m
+        labels:
+          severity: critical
+        annotations:
+          summary: "Critical CPU usage on {{ $labels.instance }}"
+          description: "CPU usage is above 95% on {{ $labels.instance }} (current value: {{ $value | printf \"%.1f\" }}%)"
+
+      - alert: HighMemoryUsage
+        expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 80
+        for: 5m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High memory usage on {{ $labels.instance }}"
+          description: "Memory usage is above 80% on {{ $labels.instance }} (current value: {{ $value | printf \"%.1f\" }}%)"
+
+      - alert: CriticalMemoryUsage
+        expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 95
+        for: 2m
+        labels:
+          severity: critical
+        annotations:
+          summary: "Critical memory usage on {{ $labels.instance }}"
+          description: "Memory usage is above 95% on {{ $labels.instance }} (current value: {{ $value | printf \"%.1f\" }}%)"
+
+      - alert: DiskSpaceLow
+        expr: (node_filesystem_avail_bytes{fstype!~"tmpfs|overlay"} / node_filesystem_size_bytes{fstype!~"tmpfs|overlay"}) * 100 < 20
+        for: 5m
+        labels:
+          severity: warning
+        annotations:
+          summary: "Low disk space on {{ $labels.instance }}"
+          description: "Disk {{ $labels.mountpoint }} on {{ $labels.instance }} has less than 20% free space (current value: {{ $value | printf \"%.1f\" }}%)"
+
+      - alert: DiskSpaceCritical
+        expr: (node_filesystem_avail_bytes{fstype!~"tmpfs|overlay"} / node_filesystem_size_bytes{fstype!~"tmpfs|overlay"}) * 100 < 10
+        for: 2m
+        labels:
+          severity: critical
+        annotations:
+          summary: "Critical disk space on {{ $labels.instance }}"
+          description: "Disk {{ $labels.mountpoint }} on {{ $labels.instance }} has less than 10% free space (current value: {{ $value | printf \"%.1f\" }}%)"
+
+      - alert: HighLoadAverage
+        expr: node_load15 / count without(cpu, mode) (node_cpu_seconds_total{mode="idle"}) > 2
+        for: 10m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High load average on {{ $labels.instance }}"
+          description: "15-minute load average is {{ $value | printf \"%.2f\" }} times the CPU count on {{ $labels.instance }}"
+
+  # ============================================================================
+  # Process-Level Alerts (puck.incus)
+  # ============================================================================
+  - name: puck_process_alerts
+    rules:
+      - alert: PuckHighCPUProcess
+        expr: sum by (groupname, instance) (rate(namedprocess_namegroup_cpu_seconds_total{instance=~"puck.*"}[2m])) * 100 > 80
+        for: 2m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High CPU process on puck: {{ $labels.groupname }}"
+          description: "Process {{ $labels.groupname }} is using {{ $value | printf \"%.1f\" }}% CPU for more than 2 minutes"
+
+      - alert: PuckCriticalCPUProcess
+        expr: sum by (groupname, instance) (rate(namedprocess_namegroup_cpu_seconds_total{instance=~"puck.*"}[1m])) * 100 > 95
+        for: 1m
+        labels:
+          severity: critical
+        annotations:
+          summary: "Critical CPU process on puck: {{ $labels.groupname }}"
+          description: "Process {{ $labels.groupname }} is using {{ $value | printf \"%.1f\" }}% CPU - immediate attention required"
+
+      - alert: PuckHighMemoryProcess
+        expr: namedprocess_namegroup_memory_bytes{memtype="resident", instance=~"puck.*"} > 1073741824
+        for: 2m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High memory process on puck: {{ $labels.groupname }}"
+          description: "Process {{ $labels.groupname }} is using {{ $value | humanize }} resident memory"
+
+      - alert: PuckCriticalMemoryProcess
+        expr: namedprocess_namegroup_memory_bytes{memtype="resident", instance=~"puck.*"} > 2147483648
+        for: 1m
+        labels:
+          severity: critical
+        annotations:
+          summary: "Critical memory process on puck: {{ $labels.groupname }}"
+          description: "Process {{ $labels.groupname }} is using {{ $value | humanize }} resident memory - immediate attention required"
+
+      - alert: PuckProcessCrashLoop
+        expr: increase(namedprocess_namegroup_num_procs{instance=~"puck.*"}[5m]) < -1
+        for: 1m
+        labels:
+          severity: warning
+        annotations:
+          summary: "Process count dropped on puck: {{ $labels.groupname }}"
+          description: "Process {{ $labels.groupname }} count has decreased, indicating possible crash or restart"
+
+  # ============================================================================
+  # Docker Container Alerts (puck.incus)
+  # ============================================================================
+  - name: puck_container_alerts
+    rules:
+      - alert: PuckHighContainerCount
+        expr: count(container_last_seen{instance=~"puck.*", name!=""}) > 5
+        for: 5m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High container count on puck"
+          description: "puck.incus has {{ $value }} running containers, which exceeds the threshold of 5"
+
+      - alert: PuckDuplicateContainers
+        expr: count by (image, instance) (container_last_seen{instance=~"puck.*", name!=""}) > 2
+        for: 5m
+        labels:
+          severity: warning
+        annotations:
+          summary: "Duplicate containers on puck: {{ $labels.image }}"
+          description: "{{ $value }} containers running the same image {{ $labels.image }} on puck"
+
+      - alert: PuckOrphanedContainer
+        expr: (time() - container_start_time_seconds{instance=~"puck.*", name=~".*_.*"}) > 3600
+        for: 10m
+        labels:
+          severity: warning
+        annotations:
+          summary: "Possible orphaned container on puck: {{ $labels.name }}"
+          description: "Container {{ $labels.name }} with auto-generated name has been running for {{ $value | humanizeDuration }}"
+
+      - alert: PuckMCPContainerOnPuck
+        expr: container_last_seen{instance=~"puck.*", image=~".*mcp-server.*|.*mcp_server.*"}
+        for: 1m
+        labels:
+          severity: critical
+        annotations:
+          summary: "MCP container detected on puck (WRONG HOST)"
+          description: "Container {{ $labels.name }} ({{ $labels.image }}) is running on puck but MCP servers should run on miranda.incus"
+
+      - alert: PuckContainerHighCPU
+        expr: sum by (name, instance) (rate(container_cpu_usage_seconds_total{instance=~"puck.*", name!=""}[2m])) * 100 > 80
+        for: 2m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High CPU container on puck: {{ $labels.name }}"
+          description: "Container {{ $labels.name }} is using {{ $value | printf \"%.1f\" }}% CPU"
+
+      - alert: PuckContainerHighMemory
+        expr: container_memory_usage_bytes{instance=~"puck.*", name!=""} > 1073741824
+        for: 2m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High memory container on puck: {{ $labels.name }}"
+          description: "Container {{ $labels.name }} is using {{ $value | humanize }} memory"
+
+      - alert: PuckContainerOOMKilled
+        expr: increase(container_oom_events_total{instance=~"puck.*", name!=""}[5m]) > 0
+        for: 0m
+        labels:
+          severity: critical
+        annotations:
+          summary: "Container OOM killed on puck: {{ $labels.name }}"
+          description: "Container {{ $labels.name }} was killed by OOM killer"
+
+  # ============================================================================
+  # Service/Application Alerts
+  # ============================================================================
+  - name: service_alerts
+    rules:
+      - alert: PrometheusTargetMissing
+        expr: up == 0
+        for: 5m
+        labels:
+          severity: warning
+        annotations:
+          summary: "Prometheus target missing: {{ $labels.instance }}"
+          description: "A Prometheus target has been down for more than 5 minutes."
+
+      - alert: PrometheusJobMissing
+        expr: absent(up{job="node-exporter"})
+        for: 5m
+        labels:
+          severity: warning
+        annotations:
+          summary: "Prometheus job missing"
+          description: "A Prometheus job has disappeared from target discovery."
+
+      - alert: AlertmanagerDown
+        expr: absent(up{job="alertmanager"})
+        for: 5m
+        labels:
+          severity: critical
+        annotations:
+          summary: "Alertmanager is down"
+          description: "Alertmanager is not responding. Alerts may not be delivered."
+
+  # ============================================================================
+  # Loki/Logging Alerts
+  # ============================================================================
+  - name: loki_alerts
+    rules:
+      - alert: LokiHighLogVolume
+        expr: sum(rate(loki_distributor_bytes_received_total[5m])) > 10485760
+        for: 10m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High log ingestion rate"
+          description: "Loki is receiving logs at {{ $value | humanize }}/s which may indicate excessive logging"
+
+# Red Panda Seal of Approval 🐼
+# "If the metrics aren't red, go back to bed"
+{% endraw %}
--- a/ansible/pplg/alertmanager.yml.j2
+++ b/ansible/pplg/alertmanager.yml.j2
@@ -0,0 +1,148 @@
+global:
+  resolve_timeout: 5m
+  smtp_smarthost: '{{ smtp_host }}:{{ smtp_port }}'
+  smtp_from: '{{ smtp_from }}'
+  smtp_require_tls: false
+
+route:
+  group_by: ['alertname', 'instance', 'severity']
+  group_wait: 30s
+  group_interval: 5m
+  repeat_interval: 4h
+  receiver: 'email'
+  routes:
+  - match:
+      severity: critical
+    receiver: 'email-critical'
+    continue: true
+  - match:
+      severity: warning
+    receiver: 'email-warning'
+    continue: true
+  - match:
+      severity: info
+    receiver: 'email-info'
+    repeat_interval: 24h
+
+inhibit_rules:
+  - source_match:
+      severity: 'critical'
+    target_match:
+      severity: 'warning'
+    equal: ['alertname', 'instance']
+
+receivers:
+- name: 'email-critical'
+  email_configs:
+  - to: 'hostmaster+critical@ouranos.helu.ca'
+    send_resolved: true
+    html: true
+    headers:
+      Subject: '🚨 [CRITICAL] {{ "{{" }} .GroupLabels.alertname {{ "}}" }}'
+    text: |-
+      {{ "{{" }} range .Alerts {{ "}}" }}
+      {{ "{{" }} .Annotations.description {{ "}}" }}
+
+      Instance: {{ "{{" }} .Labels.instance {{ "}}" }}
+      {{ "{{" }} end {{ "}}" }}
+
+- name: 'email-warning'
+  email_configs:
+  - to: 'hostmaster+warning@ouranos.helu.ca'
+    send_resolved: true
+    html: true
+    headers:
+      Subject: '⚠️ [WARNING] {{ "{{" }} .GroupLabels.alertname {{ "}}" }}'
+    text: |-
+      {{ "{{" }} range .Alerts {{ "}}" }}
+      {{ "{{" }} .Annotations.description {{ "}}" }}
+
+      Instance: {{ "{{" }} .Labels.instance {{ "}}" }}
+      {{ "{{" }} end {{ "}}" }}
+
+- name: 'email-info'
+  email_configs:
+  - to: 'hostmaster+info@ouranos.helu.ca'
+    send_resolved: false
+    html: true
+    headers:
+      Subject: '{{ "{{" }} .GroupLabels.alertname {{ "}}" }}'
+    text: '{{ "{{" }} range .Alerts {{ "}}" }}{{ "{{" }} .Annotations.description {{ "}}" }}{{ "{{" }} end {{ "}}" }}'
+
+- name: 'email'
+  email_configs:
+  - to: 'hostmaster+alerts@ouranos.helu.ca'
+    send_resolved: true
+    html: true
+    headers:
+      Subject: '[{{ "{{" }} .GroupLabels.severity | default "ALERT" {{ "}}" }}] {{ "{{" }} .GroupLabels.alertname {{ "}}" }}'
+    text: |-
+      {{ "{{" }} range .Alerts {{ "}}" }}
+      {{ "{{" }} .Annotations.description {{ "}}" }}
+
+      Instance: {{ "{{" }} .Labels.instance {{ "}}" }}
+      Severity: {{ "{{" }} .Labels.severity {{ "}}" }}
+      {{ "{{" }} end {{ "}}" }}
+
+# --- Pushover receivers (disabled for smtp4dev testing) ---
+# To re-enable: uncomment these receivers and update the route receiver names
+#   from email-*/email back to pushover-*/pushover
+#
+# - name: 'pushover-critical'
+#   pushover_configs:
+#   - user_key: '{{ pushover_user_key }}'
+#     token: '{{ pushover_api_token }}'
+#     send_resolved: true
+#     html: true
+#     priority: '2'
+#     retry: 30
+#     expire: 3600
+#     title: '🚨 [CRITICAL] {{ "{{" }} .GroupLabels.alertname {{ "}}" }}'
+#     message: |-
+#       {{ "{{" }} range .Alerts {{ "}}" }}
+#       {{ "{{" }} .Annotations.description {{ "}}" }}
+#       Instance: {{ "{{" }} .Labels.instance {{ "}}" }}
+#       {{ "{{" }} end {{ "}}" }}
+#
+# - name: 'pushover-warning'
+#   pushover_configs:
+#   - user_key: '{{ pushover_user_key }}'
+#     token: '{{ pushover_api_token }}'
+#     send_resolved: true
+#     html: true
+#     priority: '1'
+#     retry: 30
+#     expire: 3600
+#     title: '⚠️ [WARNING] {{ "{{" }} .GroupLabels.alertname {{ "}}" }}'
+#     message: |-
+#       {{ "{{" }} range .Alerts {{ "}}" }}
+#       {{ "{{" }} .Annotations.description {{ "}}" }}
+#       Instance: {{ "{{" }} .Labels.instance {{ "}}" }}
+#       {{ "{{" }} end {{ "}}" }}
+#
+# - name: 'pushover-info'
+#   pushover_configs:
+#   - user_key: '{{ pushover_user_key }}'
+#     token: '{{ pushover_api_token }}'
+#     send_resolved: false
+#     html: true
+#     priority: '0'
+#     title: '{{ "{{" }} .GroupLabels.alertname {{ "}}" }}'
+#     message: '{{ "{{" }} range .Alerts {{ "}}" }}{{ "{{" }} .Annotations.description {{ "}}" }}{{ "{{" }} end {{ "}}" }}'
+#
+# - name: 'pushover'
+#   pushover_configs:
+#   - user_key: '{{ pushover_user_key }}'
+#     token: '{{ pushover_api_token }}'
+#     send_resolved: true
+#     html: true
+#     priority: '1'
+#     retry: 30
+#     expire: 3600
+#     title: '[{{ "{{" }} .GroupLabels.severity | default "ALERT" {{ "}}" }}] {{ "{{" }} .GroupLabels.alertname {{ "}}" }}'
+#     message: |-
+#       {{ "{{" }} range .Alerts {{ "}}" }}
+#       {{ "{{" }} .Annotations.description {{ "}}" }}
+#       Instance: {{ "{{" }} .Labels.instance {{ "}}" }}
+#       Severity: {{ "{{" }} .Labels.severity {{ "}}" }}
+#       {{ "{{" }} end {{ "}}" }}
--- a/ansible/pplg/config.yml.j2
+++ b/ansible/pplg/config.yml.j2
@@ -0,0 +1,41 @@
+auth_enabled: false
+
+server:
+  http_listen_port: {{ loki_port }}
+  grpc_listen_port: {{ loki_grpc_port }}
+
+common:
+  path_prefix: {{ loki_data_dir }}
+  storage:
+    filesystem:
+      chunks_directory: {{ loki_data_dir }}/chunks
+      rules_directory: {{ loki_data_dir }}/rules
+  replication_factor: 1
+  ring:
+    instance_addr: 127.0.0.1
+    kvstore:
+      store: inmemory
+
+query_range:
+  results_cache:
+    cache:
+      embedded_cache:
+        enabled: true
+        max_size_mb: 100
+
+schema_config:
+  configs:
+    - from: 2024-04-01
+      object_store: filesystem
+      store: tsdb
+      schema: v13
+      index:
+        prefix: index_
+        period: 24h
+
+ruler:
+  alertmanager_url: http://{{ alertmanager_host }}:{{ alertmanager_port }}
+
+# Red Panda Approved Configuration
+analytics:
+  reporting_enabled: false
--- a/ansible/pplg/config_local.py.j2
+++ b/ansible/pplg/config_local.py.j2
@@ -0,0 +1,55 @@
+# PgAdmin4 Local Configuration - Managed by Ansible
+# Gunicorn-based deployment (no Apache) with Casdoor OAuth SSO
+# Red Panda Approved
+
+import os
+
+# Server settings
+DEFAULT_SERVER = '0.0.0.0'
+DEFAULT_SERVER_PORT = {{pgadmin_port}}
+
+# Data directory
+DATA_DIR = '{{pgadmin_data_dir}}'
+SESSION_DB_PATH = os.path.join(DATA_DIR, 'sessions')
+STORAGE_DIR = os.path.join(DATA_DIR, 'storage')
+SQLITE_PATH = os.path.join(DATA_DIR, 'pgadmin4.db')
+
+# Log settings
+LOG_FILE = '{{pgadmin_log_dir}}/pgadmin4.log'
+
+# Default admin credentials (for initial setup)
+SETUP_EMAIL = '{{pgadmin_email}}'
+SETUP_PASSWORD = '{{pgadmin_password}}'
+
+# Authentication - OAuth2 (Casdoor) + internal fallback
+AUTHENTICATION_SOURCES = ['oauth2', 'internal']
+
+# Master password disabled (use OAuth)
+MASTER_PASSWORD_REQUIRED = False
+
+# Reverse proxy settings (Titania HAProxy -> Prospero HAProxy -> Gunicorn)
+ENHANCED_COOKIE_PROTECTION = False
+PROXY_X_FOR_COUNT = 2
+PROXY_X_PROTO_COUNT = 2
+PROXY_X_HOST_COUNT = 2
+X_FRAME_OPTIONS = 'SAMEORIGIN'
+SESSION_COOKIE_SECURE = True
+SESSION_COOKIE_SAMESITE = 'Lax'
+WTF_CSRF_SSL_STRICT = False
+
+# OAuth2 Configuration (Casdoor OIDC)
+OAUTH2_AUTO_CREATE_USER = True
+OAUTH2_CONFIG = [{
+    'OAUTH2_NAME': 'Casdoor',
+    'OAUTH2_DISPLAY_NAME': 'Casdoor SSO',
+    'OAUTH2_CLIENT_ID': '{{pgadmin_oauth_client_id}}',
+    'OAUTH2_CLIENT_SECRET': '{{pgadmin_oauth_client_secret}}',
+    'OAUTH2_TOKEN_URL': 'https://id.ouranos.helu.ca/api/login/oauth/access_token',
+    'OAUTH2_AUTHORIZATION_URL': 'https://id.ouranos.helu.ca/login/oauth/authorize',
+    'OAUTH2_API_BASE_URL': 'https://id.ouranos.helu.ca/',
+    'OAUTH2_USERINFO_ENDPOINT': 'api/userinfo',
+    'OAUTH2_SERVER_METADATA_URL': 'https://id.ouranos.helu.ca/.well-known/openid-configuration',
+    'OAUTH2_SCOPE': 'openid profile email',
+    'OAUTH2_ICON': 'fa-openid',
+    'OAUTH2_BUTTON_COLOR': '#2db7f5',
+}]
--- a/ansible/pplg/datasource.yml.j2
+++ b/ansible/pplg/datasource.yml.j2
@@ -0,0 +1,15 @@
+apiVersion: 1
+datasources:
+  - name: {{prometheus_datasource_name}}
+    type: prometheus
+    access: proxy
+    url: http://{{prometheus_host}}:{{prometheus_port}}
+    isDefault: true
+    editable: false
+    uid: {{prometheus_datasource_uid}}
+  - name: {{loki_datasource_name}}
+    type: loki
+    access: proxy
+    url: http://{{loki_host}}:{{loki_port}}
+    editable: false
+    uid: {{loki_datasource_uid}}
--- a/ansible/pplg/deploy.yml
+++ b/ansible/pplg/deploy.yml
@@ -0,0 +1,495 @@
+---
+# PPLG - Consolidated Observability & Admin Stack for Prospero
+# PgAdmin, Prometheus, Loki, Grafana + HAProxy (TLS) + OAuth2-Proxy (Prometheus UI)
+# Red Panda Approved
+
+- name: Deploy PPLG Stack
+  hosts: ubuntu
+  become: true
+  tasks:
+  - name: Check if host has pplg service
+    ansible.builtin.set_fact:
+      has_pplg_service: "{{'pplg' in services}}"
+
+  - name: Skip hosts without pplg service
+    ansible.builtin.meta: end_host
+    when: not has_pplg_service
+
+  # ===========================================================================
+  # APT Repositories
+  # ===========================================================================
+
+  - name: Add Grafana APT repository (Grafana + Loki)
+    ansible.builtin.deb822_repository:
+      name: grafana
+      types: [deb]
+      uris: https://apt.grafana.com
+      suites: [stable]
+      components: [main]
+      signed_by: https://apt.grafana.com/gpg.key
+      state: present
+
+  - name: Add PgAdmin APT repository
+    ansible.builtin.deb822_repository:
+      name: pgadmin4
+      types: [deb]
+      uris: https://ftp.postgresql.org/pub/pgadmin/pgadmin4/apt/{{ansible_distribution_release}}
+      suites: [pgadmin4]
+      components: [main]
+      signed_by: https://www.pgadmin.org/static/packages_pgadmin_org.pub
+      state: present
+
+  # ===========================================================================
+  # Package Installation
+  # ===========================================================================
+
+  - name: Install PPLG packages
+    ansible.builtin.apt:
+      name:
+        - acl
+        - haproxy
+        - prometheus
+        - loki
+        - grafana
+        - pgadmin4-web
+      state: present
+      update_cache: true
+
+  - name: Stop and disable Apache (pulled in by pgadmin4-web)
+    ansible.builtin.systemd:
+      name: apache2
+      state: stopped
+      enabled: false
+
+  # ===========================================================================
+  # Prometheus
+  # ===========================================================================
+
+  - name: Fix Prometheus directory permissions
+    ansible.builtin.file:
+      path: /var/lib/prometheus
+      owner: prometheus
+      group: prometheus
+      mode: '750'
+      recurse: true
+
+  - name: Create textfile collector directory
+    ansible.builtin.file:
+      path: /var/lib/prometheus/node-exporter
+      state: directory
+      owner: prometheus
+      group: prometheus
+      mode: '750'
+
+  - name: Template prometheus.yml
+    ansible.builtin.template:
+      src: prometheus.yml.j2
+      dest: /etc/prometheus/prometheus.yml
+      owner: prometheus
+      group: prometheus
+      mode: '640'
+    notify: restart prometheus
+
+  - name: Template alert_rules.yml
+    ansible.builtin.template:
+      src: alert_rules.yml.j2
+      dest: /etc/prometheus/alert_rules.yml
+      owner: prometheus
+      group: prometheus
+      mode: '640'
+    notify: restart prometheus
+
+  - name: Create Prometheus systemd override directory
+    ansible.builtin.file:
+      path: /etc/systemd/system/prometheus.service.d
+      state: directory
+      mode: '755'
+
+  - name: Enable remote write receiver
+    ansible.builtin.copy:
+      content: |
+        [Service]
+        ExecStart=
+        ExecStart=/usr/bin/prometheus --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/var/lib/prometheus/metrics2/ --web.console.templates=/etc/prometheus/consoles --web.console.libraries=/etc/prometheus/console_libraries --web.listen-address=0.0.0.0:9090 --web.external-url= --web.enable-remote-write-receiver
+      dest: /etc/systemd/system/prometheus.service.d/override.conf
+      mode: '644'
+    notify: restart prometheus
+
+  - name: Start and enable Prometheus service
+    ansible.builtin.systemd:
+      name: prometheus
+      state: started
+      enabled: true
+      daemon_reload: true
+
+  # ===========================================================================
+  # Prometheus Alertmanager
+  # ===========================================================================
+
+  - name: Install Alertmanager
+    ansible.builtin.apt:
+      name: prometheus-alertmanager
+      state: present
+
+  - name: Create alertmanager configuration directory
+    ansible.builtin.file:
+      path: /etc/alertmanager
+      state: directory
+      owner: prometheus
+      group: prometheus
+      mode: '750'
+
+  - name: Template alertmanager.yml
+    ansible.builtin.template:
+      src: alertmanager.yml.j2
+      dest: /etc/alertmanager/alertmanager.yml
+      owner: prometheus
+      group: prometheus
+      mode: '640'
+    notify: restart alertmanager
+
+  - name: Start and enable Alertmanager service
+    ansible.builtin.systemd:
+      name: prometheus-alertmanager
+      state: started
+      enabled: true
+      daemon_reload: true
+
+  # ===========================================================================
+  # Loki
+  # ===========================================================================
+
+  - name: Create loki group
+    ansible.builtin.group:
+      name: "{{loki_group}}"
+
+  - name: Create loki user
+    ansible.builtin.user:
+      name: "{{loki_user}}"
+      comment: "{{loki_user}}"
+      group: "{{loki_group}}"
+      system: true
+
+  - name: Create loki directories
+    ansible.builtin.file:
+      path: "{{item}}"
+      owner: "{{loki_user}}"
+      group: "{{loki_group}}"
+      state: directory
+      mode: '750'
+    loop:
+      - "{{loki_data_dir}}"
+      - "{{loki_config_dir}}"
+
+  - name: Template Loki configuration
+    ansible.builtin.template:
+      src: "{{loki_config_file}}.j2"
+      dest: "{{loki_config_dir}}/{{loki_config_file}}"
+      owner: "{{loki_user}}"
+      group: "{{loki_group}}"
+      mode: '550'
+    notify: restart loki
+
+  - name: Enable and start Loki service
+    ansible.builtin.systemd:
+      name: loki
+      enabled: true
+      state: started
+
+  # ===========================================================================
+  # Grafana
+  # ===========================================================================
+
+  - name: Create dashboards directory
+    ansible.builtin.file:
+      path: /var/lib/grafana/dashboards
+      state: directory
+      owner: grafana
+      group: grafana
+      mode: '750'
+
+  - name: Template Grafana main configuration
+    ansible.builtin.template:
+      src: "grafana.ini.j2"
+      dest: "/etc/grafana/grafana.ini"
+      owner: grafana
+      group: grafana
+      mode: '640'
+    when: grafana_oauth_enabled | default(false)
+    notify: restart grafana
+
+  - name: Enable and start Grafana service
+    ansible.builtin.systemd:
+      name: grafana-server
+      enabled: true
+      state: started
+      daemon_reload: true
+
+  # ===========================================================================
+  # PgAdmin (Gunicorn - no Apache)
+  # ===========================================================================
+
+  - name: Create pgadmin group
+    ansible.builtin.group:
+      name: "{{pgadmin_group}}"
+      system: true
+
+  - name: Create pgadmin user
+    ansible.builtin.user:
+      name: "{{pgadmin_user}}"
+      comment: "PgAdmin Service"
+      group: "{{pgadmin_group}}"
+      system: true
+      create_home: false
+      shell: /usr/sbin/nologin
+
+  - name: Create PgAdmin directories
+    ansible.builtin.file:
+      path: "{{item}}"
+      state: directory
+      owner: "{{pgadmin_user}}"
+      group: "{{pgadmin_group}}"
+      mode: '750'
+    loop:
+      - "{{pgadmin_data_dir}}"
+      - "{{pgadmin_data_dir}}/sessions"
+      - "{{pgadmin_data_dir}}/storage"
+      - "{{pgadmin_data_dir}}/certs"
+      - "{{pgadmin_log_dir}}"
+
+  - name: Install gunicorn into PgAdmin venv
+    ansible.builtin.command:
+      cmd: /usr/pgadmin4/venv/bin/pip install gunicorn
+    register: pip_gunicorn
+    changed_when: "'Successfully installed' in pip_gunicorn.stdout"
+
+  - name: Initialize PgAdmin database
+    ansible.builtin.command:
+      cmd: /usr/pgadmin4/venv/bin/python3 /usr/pgadmin4/web/setup.py setup-db
+      creates: "{{pgadmin_data_dir}}/pgadmin4.db"
+    become_user: "{{pgadmin_user}}"
+
+  - name: Template PgAdmin local config
+    ansible.builtin.template:
+      src: config_local.py.j2
+      dest: /usr/pgadmin4/web/config_local.py
+      owner: "{{pgadmin_user}}"
+      group: "{{pgadmin_group}}"
+      mode: '640'
+    notify: restart pgadmin
+
+  - name: Fetch Titania PostgreSQL SSL cert
+    ansible.builtin.fetch:
+      src: /etc/postgresql/17/main/ssl/server.crt
+      dest: /tmp/titania-postgres-ca.crt
+      flat: yes
+    delegate_to: titania.incus
+    when: "'titania.incus' in groups['ubuntu']"
+
+  - name: Copy Titania PostgreSQL SSL cert to PgAdmin
+    ansible.builtin.copy:
+      src: /tmp/titania-postgres-ca.crt
+      dest: "{{pgadmin_data_dir}}/certs/titania-postgres-ca.crt"
+      owner: "{{pgadmin_user}}"
+      group: "{{pgadmin_group}}"
+      mode: '0644'
+    when: "'titania.incus' in groups['ubuntu']"
+
+  - name: Template PgAdmin systemd service
+    ansible.builtin.template:
+      src: pgadmin.service.j2
+      dest: /etc/systemd/system/pgadmin.service
+      owner: root
+      group: root
+      mode: '0644'
+    notify: restart pgadmin
+
+  - name: Enable and start PgAdmin service
+    ansible.builtin.systemd:
+      name: pgadmin
+      enabled: true
+      state: started
+      daemon_reload: true
+
+  # ===========================================================================
+  # OAuth2-Proxy Sidecar (Prometheus UI)
+  # ===========================================================================
+
+  - name: Create oauth2-proxy config directory
+    ansible.builtin.file:
+      path: "{{prometheus_oauth2_proxy_dir}}"
+      owner: root
+      group: root
+      state: directory
+      mode: '0755'
+
+  - name: Download oauth2-proxy binary
+    ansible.builtin.get_url:
+      url: "https://github.com/oauth2-proxy/oauth2-proxy/releases/download/v{{prometheus_oauth2_proxy_version}}/oauth2-proxy-v{{prometheus_oauth2_proxy_version}}.linux-amd64.tar.gz"
+      dest: "/tmp/oauth2-proxy-v{{prometheus_oauth2_proxy_version}}.tar.gz"
+      mode: '0644'
+
+  - name: Extract oauth2-proxy binary
+    ansible.builtin.unarchive:
+      src: "/tmp/oauth2-proxy-v{{prometheus_oauth2_proxy_version}}.tar.gz"
+      dest: /tmp
+      remote_src: true
+      creates: "/tmp/oauth2-proxy-v{{prometheus_oauth2_proxy_version}}.linux-amd64/oauth2-proxy"
+
+  - name: Install oauth2-proxy binary
+    ansible.builtin.copy:
+      src: "/tmp/oauth2-proxy-v{{prometheus_oauth2_proxy_version}}.linux-amd64/oauth2-proxy"
+      dest: /usr/local/bin/oauth2-proxy
+      owner: root
+      group: root
+      mode: '0755'
+      remote_src: true
+
+  - name: Template oauth2-proxy configuration for Prometheus
+    ansible.builtin.template:
+      src: oauth2-proxy-prometheus.cfg.j2
+      dest: "{{prometheus_oauth2_proxy_dir}}/oauth2-proxy.cfg"
+      owner: root
+      group: root
+      mode: '0600'
+    notify: restart oauth2-proxy-prometheus
+
+  - name: Template oauth2-proxy systemd service for Prometheus
+    ansible.builtin.template:
+      src: oauth2-proxy-prometheus.service.j2
+      dest: /etc/systemd/system/oauth2-proxy-prometheus.service
+      owner: root
+      group: root
+      mode: '0644'
+    notify:
+      - reload systemd
+      - restart oauth2-proxy-prometheus
+
+  - name: Enable and start OAuth2-Proxy for Prometheus
+    ansible.builtin.systemd:
+      name: oauth2-proxy-prometheus
+      enabled: true
+      state: started
+      daemon_reload: true
+
+  # ===========================================================================
+  # SSL Certificate Distribution (from Titania)
+  # ===========================================================================
+
+  - name: Create haproxy group
+    ansible.builtin.group:
+      name: "{{pplg_haproxy_group}}"
+      gid: "{{pplg_haproxy_gid}}"
+      system: true
+
+  - name: Create haproxy user
+    ansible.builtin.user:
+      name: "{{pplg_haproxy_user}}"
+      comment: "PPLG HAProxy"
+      group: "{{pplg_haproxy_group}}"
+      uid: "{{pplg_haproxy_uid}}"
+      system: true
+
+  - name: Create HAProxy directories
+    ansible.builtin.file:
+      path: "{{item}}"
+      state: directory
+      owner: "{{pplg_haproxy_user}}"
+      group: "{{pplg_haproxy_group}}"
+      mode: '750'
+    loop:
+      - /etc/haproxy
+      - /etc/haproxy/certs
+
+  - name: Fetch wildcard certificate from Titania
+    ansible.builtin.fetch:
+      src: /etc/haproxy/certs/ouranos.pem
+      dest: /tmp/ouranos-haproxy.pem
+      flat: yes
+    delegate_to: titania.incus
+    when: "'titania.incus' in groups['ubuntu']"
+
+  - name: Deploy wildcard certificate
+    ansible.builtin.copy:
+      src: /tmp/ouranos-haproxy.pem
+      dest: "{{pplg_haproxy_cert_path}}"
+      owner: "{{pplg_haproxy_user}}"
+      group: "{{pplg_haproxy_group}}"
+      mode: '0640'
+    when: "'titania.incus' in groups['ubuntu']"
+
+  - name: Generate self-signed wildcard certificate (fallback)
+    command: >
+      openssl req -x509 -nodes -days 365 -newkey rsa:2048
+      -keyout {{pplg_haproxy_cert_path}}
+      -out {{pplg_haproxy_cert_path}}
+      -subj "/C=US/ST=State/L=City/O=Agathos/CN=*.{{pplg_haproxy_domain}}"
+      -addext "subjectAltName=DNS:*.{{pplg_haproxy_domain}},DNS:{{pplg_haproxy_domain}}"
+    when: "'titania.incus' not in groups['ubuntu']"
+    args:
+      creates: "{{pplg_haproxy_cert_path}}"
+
+  # ===========================================================================
+  # HAProxy (TLS Termination)
+  # ===========================================================================
+
+  - name: Template HAProxy configuration
+    ansible.builtin.template:
+      src: pplg-haproxy.cfg.j2
+      dest: /etc/haproxy/haproxy.cfg
+      owner: "{{pplg_haproxy_user}}"
+      group: "{{pplg_haproxy_group}}"
+      mode: "640"
+      validate: haproxy -c -f %s
+    notify: restart haproxy
+
+  - name: Enable and start HAProxy service
+    ansible.builtin.systemd:
+      name: haproxy
+      enabled: true
+      state: started
+
+  # ===========================================================================
+  # Handlers
+  # ===========================================================================
+  handlers:
+  - name: restart prometheus
+    ansible.builtin.systemd:
+      name: prometheus
+      state: restarted
+      daemon_reload: true
+
+  - name: restart alertmanager
+    ansible.builtin.systemd:
+      name: prometheus-alertmanager
+      state: restarted
+
+  - name: restart loki
+    ansible.builtin.systemd:
+      name: loki
+      state: restarted
+
+  - name: restart grafana
+    ansible.builtin.systemd:
+      name: grafana-server
+      state: restarted
+
+  - name: restart pgadmin
+    ansible.builtin.systemd:
+      name: pgadmin
+      state: restarted
+      daemon_reload: true
+
+  - name: reload systemd
+    ansible.builtin.systemd:
+      daemon_reload: true
+
+  - name: restart haproxy
+    ansible.builtin.systemd:
+      name: haproxy
+      state: reloaded
+
+  - name: restart oauth2-proxy-prometheus
+    ansible.builtin.systemd:
+      name: oauth2-proxy-prometheus
+      state: restarted
--- a/ansible/pplg/grafana.ini.j2
+++ b/ansible/pplg/grafana.ini.j2
@@ -0,0 +1,36 @@
+# Grafana Configuration - Managed by Ansible
+# Do not edit manually - changes will be overwritten
+
+[server]
+root_url = {{ grafana_root_url }}
+
+[auth]
+# Disable login form for OAuth users (admins can still use local auth)
+disable_login_form = false
+
+[auth.generic_oauth]
+enabled = {{ grafana_oauth_enabled }}
+name = {{ grafana_oauth_name | default('Casdoor') }}
+allow_sign_up = {{ grafana_oauth_allow_sign_up | default(true) | lower }}
+client_id = {{ grafana_oauth_client_id }}
+client_secret = {{ grafana_oauth_client_secret }}
+scopes = {{ grafana_oauth_scopes | default('openid profile email') }}
+auth_url = {{ grafana_oauth_auth_url }}
+token_url = {{ grafana_oauth_token_url }}
+api_url = {{ grafana_oauth_api_url }}
+# Map Casdoor user attributes to Grafana
+email_attribute_path = email
+login_attribute_path = preferred_username
+name_attribute_path = name
+# Default role for new OAuth users
+role_attribute_path = contains(groups[*], 'grafana-admin') && 'Admin' || contains(groups[*], 'grafana-editor') && 'Editor' || 'Viewer'
+# TLS settings for internal communication
+tls_skip_verify_insecure = {{ grafana_oauth_skip_tls_verify | default(true) | lower }}
+
+[log]
+# Console-only logging — systemd journal captures output, Alloy ships to Loki
+mode = console
+level = {{ grafana_log_level | default('info') }}
+
+[log.console]
+format = text
--- a/ansible/pplg/oauth2-proxy-prometheus.cfg.j2
+++ b/ansible/pplg/oauth2-proxy-prometheus.cfg.j2
@@ -0,0 +1,62 @@
+# OAuth2-Proxy Configuration for Prometheus UI
+# Authenticates users via Casdoor OIDC before proxying to Prometheus
+# Red Panda Approved
+
+# Provider Configuration (Casdoor OIDC)
+provider = "oidc"
+provider_display_name = "Casdoor"
+oidc_issuer_url = "{{prometheus_oauth2_oidc_issuer_url}}"
+client_id = "{{prometheus_oauth2_client_id}}"
+client_secret = "{{prometheus_oauth2_client_secret}}"
+
+# Redirect URL after authentication
+redirect_url = "https://prometheus.{{pplg_haproxy_domain}}/oauth2/callback"
+
+# Upstream service (Prometheus)
+upstreams = [
+    "http://127.0.0.1:9090"
+]
+
+# Session/Cookie Configuration
+cookie_secret = "{{prometheus_oauth2_cookie_secret}}"
+cookie_name = "_oauth2_proxy_prometheus"
+cookie_secure = true
+cookie_httponly = true
+cookie_expire = "168h"
+cookie_refresh = "1h"
+cookie_domains = ".{{pplg_haproxy_domain}}"
+session_store_type = "cookie"
+
+# Authentication settings
+email_domains = ["*"]
+oidc_email_claim = "email"
+oidc_groups_claim = "groups"
+insecure_oidc_allow_unverified_email = true
+
+# Request settings
+pass_access_token = false
+pass_authorization_header = false
+set_authorization_header = false
+set_xauthrequest = true
+
+# Logging
+request_logging = true
+auth_logging = true
+standard_logging = true
+
+# Network settings
+http_address = "0.0.0.0:{{prometheus_proxy_port}}"
+reverse_proxy = true
+real_client_ip_header = "X-Forwarded-For"
+
+# Skip authentication for health check endpoints
+skip_auth_routes = [
+    "^/ping$"
+]
+
+# OIDC specific settings
+skip_provider_button = true
+oidc_extra_audiences = []
+
+# SSL verification
+ssl_insecure_skip_verify = false
--- a/ansible/pplg/oauth2-proxy-prometheus.service.j2
+++ b/ansible/pplg/oauth2-proxy-prometheus.service.j2
@@ -0,0 +1,18 @@
+[Unit]
+Description=OAuth2-Proxy for Prometheus UI
+After=network.target prometheus.service
+Wants=prometheus.service
+
+[Service]
+Type=simple
+ExecStart=/usr/local/bin/oauth2-proxy --config={{prometheus_oauth2_proxy_dir}}/oauth2-proxy.cfg
+Restart=on-failure
+RestartSec=5
+NoNewPrivileges=true
+PrivateTmp=true
+StandardOutput=journal
+StandardError=journal
+SyslogIdentifier=oauth2-proxy-prometheus
+
+[Install]
+WantedBy=multi-user.target
--- a/ansible/pplg/pgadmin.service.j2
+++ b/ansible/pplg/pgadmin.service.j2
@@ -0,0 +1,27 @@
+[Unit]
+Description=PgAdmin4 Web Interface (Gunicorn)
+After=network.target
+Wants=network.target
+
+[Service]
+Type=simple
+User={{pgadmin_user}}
+Group={{pgadmin_group}}
+WorkingDirectory=/usr/pgadmin4/web
+ExecStart=/usr/pgadmin4/venv/bin/python3 -m gunicorn pgAdmin4:app \
+    --bind 127.0.0.1:{{pgadmin_port}} \
+    --workers 1 \
+    --threads 4 \
+    --timeout 120 \
+    --access-logfile - \
+    --error-logfile -
+Restart=on-failure
+RestartSec=5
+NoNewPrivileges=true
+PrivateTmp=true
+StandardOutput=journal
+StandardError=journal
+SyslogIdentifier=pgadmin
+
+[Install]
+WantedBy=multi-user.target
--- a/ansible/pplg/pplg-haproxy.cfg.j2
+++ b/ansible/pplg/pplg-haproxy.cfg.j2
@@ -0,0 +1,127 @@
+# PPLG HAProxy - Internal TLS Termination for Prospero
+# Services: Grafana, PgAdmin, Prometheus (via OAuth2-Proxy), Loki, Alertmanager
+# Managed by Ansible - Red Panda Approved
+
+global
+    log 127.0.0.1:{{pplg_haproxy_syslog_port}} local0
+    stats timeout 30s
+
+    # Default SSL material locations
+    ca-base /etc/ssl/certs
+    crt-base /etc/ssl/private
+
+    # SSL/TLS configuration
+    ssl-default-bind-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384
+    ssl-default-bind-ciphersuites TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256
+    ssl-default-bind-options ssl-min-ver TLSv1.2 no-tls-tickets
+
+defaults
+    log     global
+    mode    http
+    option  httplog
+    option  dontlognull
+    log-format "%ci:%cp [%tr] %ft %b/%s %TR/%Tw/%Tc/%Tr/%Ta %ST %B %CC %CS %tsc %ac/%fc/%bc/%sc/%rc %sq/%bq %hr %hs %{+Q}r"
+    timeout connect 5s
+    timeout client  50s
+    timeout server  50s
+
+# Stats page with Prometheus metrics
+listen stats
+    bind *:{{pplg_haproxy_stats_port}}
+    mode http
+    stats enable
+    stats uri /metrics
+    stats refresh 15s
+    stats show-legends
+    stats show-node
+
+    # Prometheus metrics endpoint
+    http-request use-service prometheus-exporter if { path /metrics }
+
+# HTTP frontend - redirect all traffic to HTTPS
+frontend http_frontend
+    bind *:80
+    mode http
+    option httplog
+    http-request redirect scheme https code 301
+
+# HTTPS frontend with subdomain-based routing
+frontend https_frontend
+    bind *:443 ssl crt {{pplg_haproxy_cert_path}}
+    mode http
+    option httplog
+    option forwardfor
+
+    # Forward original protocol and host
+    http-request set-header X-Forwarded-Proto https
+    http-request set-header X-Forwarded-Port %[dst_port]
+    http-request set-header X-Forwarded-Host %[req.hdr(Host)]
+
+    # Security headers
+    http-response set-header Strict-Transport-Security "max-age=31536000; includeSubDomains"
+    http-response set-header X-Frame-Options "SAMEORIGIN"
+    http-response set-header X-Content-Type-Options "nosniff"
+    http-response set-header X-XSS-Protection "1; mode=block"
+
+    # Subdomain ACLs
+    acl host_grafana hdr_beg(host) -i grafana.{{pplg_haproxy_domain}}
+    acl host_pgadmin hdr_beg(host) -i pgadmin.{{pplg_haproxy_domain}}
+    acl host_prometheus hdr_beg(host) -i prometheus.{{pplg_haproxy_domain}}
+    acl host_loki hdr_beg(host) -i loki.{{pplg_haproxy_domain}}
+    acl host_alertmanager hdr_beg(host) -i alertmanager.{{pplg_haproxy_domain}}
+
+    # Prometheus write API - bypass OAuth2-Proxy (machine-to-machine)
+    acl is_prometheus_write path_beg /api/v1/write
+
+    use_backend backend_grafana if host_grafana
+    use_backend backend_pgadmin if host_pgadmin
+    use_backend backend_prometheus_direct if host_prometheus is_prometheus_write
+    use_backend backend_prometheus if host_prometheus
+    use_backend backend_loki if host_loki
+    use_backend backend_alertmanager if host_alertmanager
+
+# Grafana - Native Casdoor OAuth SSO
+backend backend_grafana
+    mode http
+    balance roundrobin
+    option httpchk GET /api/health
+    http-check expect status 200
+    server grafana_1 127.0.0.1:3000 check
+
+# PgAdmin - Native Casdoor OAuth SSO
+backend backend_pgadmin
+    mode http
+    balance roundrobin
+    option httpchk GET /misc/ping
+    http-check expect status 200
+    server pgadmin_1 127.0.0.1:{{pgadmin_port}} check
+
+# Prometheus UI - via OAuth2-Proxy sidecar
+backend backend_prometheus
+    mode http
+    balance roundrobin
+    option httpchk GET /ping
+    http-check expect status 200
+    server prometheus_1 127.0.0.1:{{prometheus_proxy_port}} check
+
+# Prometheus Write API - direct (no auth, machine-to-machine)
+backend backend_prometheus_direct
+    mode http
+    balance roundrobin
+    server prometheus_write_1 127.0.0.1:9090 check
+
+# Loki - no auth (machine-to-machine log ingestion)
+backend backend_loki
+    mode http
+    balance roundrobin
+    option httpchk GET /ready
+    http-check expect status 200
+    server loki_1 127.0.0.1:{{loki_port}} check
+
+# Alertmanager - internal only
+backend backend_alertmanager
+    mode http
+    balance roundrobin
+    option httpchk GET /-/healthy
+    http-check expect status 200
+    server alertmanager_1 127.0.0.1:{{alertmanager_port}} check
--- a/ansible/pplg/prometheus.yml.j2
+++ b/ansible/pplg/prometheus.yml.j2
@@ -0,0 +1,48 @@
+global:
+  scrape_interval: {{ prometheus_scrape_interval }}
+  evaluation_interval: {{ prometheus_evaluation_interval }}
+
+alerting:
+  alertmanagers:
+    - static_configs:
+        - targets:
+          - {{ alertmanager_host }}:{{ alertmanager_port }}
+
+rule_files:
+  - "alert_rules.yml"
+
+scrape_configs:
+  - job_name: 'prometheus'
+    static_configs:
+      - targets: ['localhost:9090']
+
+  - job_name: 'node-exporter'
+    static_configs:
+      - targets: {{ prometheus_targets | to_json }}
+
+  - job_name: 'alertmanager'
+    static_configs:
+      - targets: ['{{ alertmanager_host }}:{{ alertmanager_port }}']
+
+  - job_name: 'haproxy'
+    static_configs:
+      - targets: ['titania.incus:8404']
+    metrics_path: '/metrics'
+
+  - job_name: 'gitea'
+    static_configs:
+      - targets: ['oberon.incus:22084']
+    metrics_path: '/metrics'
+    authorization:
+      type: Bearer
+      credentials: '{{ vault_gitea_metrics_token }}'
+
+  - job_name: 'casdoor'
+    static_configs:
+      - targets: ['{{ casdoor_metrics_host }}:{{ casdoor_metrics_port }}']
+    metrics_path: '/api/metrics'
+    params:
+      accessKey: ['{{ casdoor_prometheus_access_key }}']
+      accessSecret: ['{{ casdoor_prometheus_access_secret }}']
+
+# Red Panda Approved Prometheus Configuration