refactor: remove HAProxy from Prospero, centralize TLS on Titania

Move TLS termination and reverse proxying entirely to Titania's
HAProxy, eliminating the redundant HAProxy instance on Prospero.
Backends now communicate over plain HTTP within the internal network.

- Remove HAProxy container, config, certs, and syslog from Prospero
- Remove ssl_backend flags from Titania backend definitions
- Replace pplg_haproxy_* vars with single pplg_domain variable
- Remove HAProxy syslog source from Alloy config
- Update OAuth2-Proxy to listen on all interfaces for Titania access
This commit is contained in:
2026-04-08 17:57:09 +00:00
parent df1ee5e778
commit 0f21380fd0
8 changed files with 56 additions and 354 deletions

View File

@@ -1,6 +1,6 @@
// Prospero Alloy Configuration // Prospero Alloy Configuration
// Red Panda Approved 🐼 // Red Panda Approved 🐼
// Services: PPLG stack (Grafana, Prometheus, Loki, Alertmanager, PgAdmin, HAProxy, OAuth2-Proxy) // Services: PPLG stack (Grafana, Prometheus, Loki, Alertmanager, PgAdmin, OAuth2-Proxy)
logging { logging {
level = "{{alloy_log_level}}" level = "{{alloy_log_level}}"
@@ -19,20 +19,6 @@ loki.source.file "system_logs" {
forward_to = [loki.write.default.receiver] forward_to = [loki.write.default.receiver]
} }
// PPLG HAProxy syslog receiver (HAProxy syslog → Alloy → Loki)
loki.source.syslog "pplg_haproxy" {
listener {
address = "127.0.0.1:{{pplg_haproxy_syslog_port}}"
protocol = "tcp"
labels = {
job = "pplg-haproxy",
hostname = "{{inventory_hostname}}",
environment = "{{deployment_environment}}",
}
}
forward_to = [loki.write.default.receiver]
}
// Journal relabeling - assign dedicated job labels per systemd unit // Journal relabeling - assign dedicated job labels per systemd unit
loki.relabel "journal" { loki.relabel "journal" {
forward_to = [] forward_to = []

View File

@@ -1,6 +1,6 @@
--- ---
# Prospero Configuration - PPLG Observability & Admin Stack # Prospero Configuration - PPLG Observability & Admin Stack
# Services: pplg (PgAdmin, Prometheus, Loki, Grafana + HAProxy + OAuth2-Proxy) # Services: pplg (PgAdmin, Prometheus, Loki, Grafana + OAuth2-Proxy)
ansible_user: robert ansible_user: robert
@@ -12,17 +12,10 @@ services:
alloy_log_level: "warn" alloy_log_level: "warn"
# ============================================================================ # ============================================================================
# PPLG HAProxy Configuration # PPLG Domain (TLS termination handled by Titania HAProxy)
# ============================================================================ # ============================================================================
pplg_haproxy_user: haproxy pplg_domain: "ouranos.helu.ca"
pplg_haproxy_group: haproxy
pplg_haproxy_uid: 800
pplg_haproxy_gid: 800
pplg_haproxy_domain: "ouranos.helu.ca"
pplg_haproxy_cert_path: /etc/haproxy/certs/ouranos.pem
pplg_haproxy_stats_port: 8404
pplg_haproxy_syslog_port: 51405
# ============================================================================ # ============================================================================
# Grafana # Grafana

View File

@@ -89,31 +89,26 @@ haproxy_backends:
backend_host: "prospero.incus" backend_host: "prospero.incus"
backend_port: 5050 backend_port: 5050
health_path: "/misc/ping" health_path: "/misc/ping"
ssl_backend: true
- subdomain: "grafana" - subdomain: "grafana"
backend_host: "prospero.incus" backend_host: "prospero.incus"
backend_port: 3000 backend_port: 3000
health_path: "/api/health" health_path: "/api/health"
ssl_backend: true
- subdomain: "prometheus" - subdomain: "prometheus"
backend_host: "prospero.incus" backend_host: "prospero.incus"
backend_port: 9090 backend_port: 9091 # OAuth2-Proxy sidecar (skips auth for /api/v1/write and /ping)
health_path: "/ping" health_path: "/ping"
ssl_backend: true
- subdomain: "loki" - subdomain: "loki"
backend_host: "prospero.incus" backend_host: "prospero.incus"
backend_port: 3100 backend_port: 3100
health_path: "/ready" health_path: "/ready"
ssl_backend: true
- subdomain: "alertmanager" - subdomain: "alertmanager"
backend_host: "prospero.incus" backend_host: "prospero.incus"
backend_port: 9093 backend_port: 9093
health_path: "/-/healthy" health_path: "/-/healthy"
ssl_backend: true
- subdomain: "gitea" - subdomain: "gitea"
backend_host: "rosalind.incus" backend_host: "rosalind.incus"

View File

@@ -1,6 +1,7 @@
--- ---
# PPLG - Consolidated Observability & Admin Stack for Prospero # PPLG - Consolidated Observability & Admin Stack for Prospero
# PgAdmin, Prometheus, Loki, Grafana + HAProxy (TLS) + OAuth2-Proxy (Prometheus UI) # PgAdmin, Prometheus, Loki, Grafana + OAuth2-Proxy (Prometheus UI)
# TLS termination handled by Titania HAProxy
# Red Panda Approved # Red Panda Approved
- name: Deploy PPLG Stack - name: Deploy PPLG Stack
@@ -47,7 +48,6 @@
ansible.builtin.apt: ansible.builtin.apt:
name: name:
- acl - acl
- haproxy
- prometheus - prometheus
- loki - loki
- grafana - grafana
@@ -372,83 +372,6 @@
state: started state: started
daemon_reload: true daemon_reload: true
# ===========================================================================
# SSL Certificate Distribution (from Titania)
# ===========================================================================
- name: Create haproxy group
ansible.builtin.group:
name: "{{pplg_haproxy_group}}"
gid: "{{pplg_haproxy_gid}}"
system: true
- name: Create haproxy user
ansible.builtin.user:
name: "{{pplg_haproxy_user}}"
comment: "PPLG HAProxy"
group: "{{pplg_haproxy_group}}"
uid: "{{pplg_haproxy_uid}}"
system: true
- name: Create HAProxy directories
ansible.builtin.file:
path: "{{item}}"
state: directory
owner: "{{pplg_haproxy_user}}"
group: "{{pplg_haproxy_group}}"
mode: '750'
loop:
- /etc/haproxy
- /etc/haproxy/certs
- name: Fetch wildcard certificate from Titania
ansible.builtin.fetch:
src: /etc/haproxy/certs/ouranos.pem
dest: /tmp/ouranos-haproxy.pem
flat: yes
delegate_to: titania.incus
when: "'titania.incus' in groups['ubuntu']"
- name: Deploy wildcard certificate
ansible.builtin.copy:
src: /tmp/ouranos-haproxy.pem
dest: "{{pplg_haproxy_cert_path}}"
owner: "{{pplg_haproxy_user}}"
group: "{{pplg_haproxy_group}}"
mode: '0640'
when: "'titania.incus' in groups['ubuntu']"
- name: Generate self-signed wildcard certificate (fallback)
command: >
openssl req -x509 -nodes -days 365 -newkey rsa:2048
-keyout {{pplg_haproxy_cert_path}}
-out {{pplg_haproxy_cert_path}}
-subj "/C=US/ST=State/L=City/O=Ouranos/CN=*.{{pplg_haproxy_domain}}"
-addext "subjectAltName=DNS:*.{{pplg_haproxy_domain}},DNS:{{pplg_haproxy_domain}}"
when: "'titania.incus' not in groups['ubuntu']"
args:
creates: "{{pplg_haproxy_cert_path}}"
# ===========================================================================
# HAProxy (TLS Termination)
# ===========================================================================
- name: Template HAProxy configuration
ansible.builtin.template:
src: pplg-haproxy.cfg.j2
dest: /etc/haproxy/haproxy.cfg
owner: "{{pplg_haproxy_user}}"
group: "{{pplg_haproxy_group}}"
mode: "640"
validate: haproxy -c -f %s
notify: restart haproxy
- name: Enable and start HAProxy service
ansible.builtin.systemd:
name: haproxy
enabled: true
state: started
# =========================================================================== # ===========================================================================
# Handlers # Handlers
# =========================================================================== # ===========================================================================
@@ -484,11 +407,6 @@
ansible.builtin.systemd: ansible.builtin.systemd:
daemon_reload: true daemon_reload: true
- name: restart haproxy
ansible.builtin.systemd:
name: haproxy
state: reloaded
- name: restart oauth2-proxy-prometheus - name: restart oauth2-proxy-prometheus
ansible.builtin.systemd: ansible.builtin.systemd:
name: oauth2-proxy-prometheus name: oauth2-proxy-prometheus

View File

@@ -10,7 +10,7 @@ client_id = "{{prometheus_oauth2_client_id}}"
client_secret = "{{prometheus_oauth2_client_secret}}" client_secret = "{{prometheus_oauth2_client_secret}}"
# Redirect URL after authentication # Redirect URL after authentication
redirect_url = "https://prometheus.{{pplg_haproxy_domain}}/oauth2/callback" redirect_url = "https://prometheus.{{pplg_domain}}/oauth2/callback"
# Upstream service (Prometheus) # Upstream service (Prometheus)
upstreams = [ upstreams = [
@@ -24,7 +24,7 @@ cookie_secure = true
cookie_httponly = true cookie_httponly = true
cookie_expire = "168h" cookie_expire = "168h"
cookie_refresh = "1h" cookie_refresh = "1h"
cookie_domains = ".{{pplg_haproxy_domain}}" cookie_domains = ".{{pplg_domain}}"
session_store_type = "cookie" session_store_type = "cookie"
# Authentication settings # Authentication settings
@@ -49,9 +49,10 @@ http_address = "0.0.0.0:{{prometheus_proxy_port}}"
reverse_proxy = true reverse_proxy = true
real_client_ip_header = "X-Forwarded-For" real_client_ip_header = "X-Forwarded-For"
# Skip authentication for health check endpoints # Skip authentication for health check and machine-to-machine endpoints
skip_auth_routes = [ skip_auth_routes = [
"^/ping$" "^/ping$",
"^/api/v1/write$"
] ]
# OIDC specific settings # OIDC specific settings

View File

@@ -9,7 +9,7 @@ User={{pgadmin_user}}
Group={{pgadmin_group}} Group={{pgadmin_group}}
WorkingDirectory=/usr/pgadmin4/web WorkingDirectory=/usr/pgadmin4/web
ExecStart=/usr/pgadmin4/venv/bin/python3 -m gunicorn pgAdmin4:app \ ExecStart=/usr/pgadmin4/venv/bin/python3 -m gunicorn pgAdmin4:app \
--bind 127.0.0.1:{{pgadmin_port}} \ --bind 0.0.0.0:{{pgadmin_port}} \
--workers 1 \ --workers 1 \
--threads 4 \ --threads 4 \
--timeout 120 \ --timeout 120 \

View File

@@ -1,127 +0,0 @@
# PPLG HAProxy - Internal TLS Termination for Prospero
# Services: Grafana, PgAdmin, Prometheus (via OAuth2-Proxy), Loki, Alertmanager
# Managed by Ansible - Red Panda Approved
global
log 127.0.0.1:{{pplg_haproxy_syslog_port}} local0
stats timeout 30s
# Default SSL material locations
ca-base /etc/ssl/certs
crt-base /etc/ssl/private
# SSL/TLS configuration
ssl-default-bind-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384
ssl-default-bind-ciphersuites TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256
ssl-default-bind-options ssl-min-ver TLSv1.2 no-tls-tickets
defaults
log global
mode http
option httplog
option dontlognull
log-format "%ci:%cp [%tr] %ft %b/%s %TR/%Tw/%Tc/%Tr/%Ta %ST %B %CC %CS %tsc %ac/%fc/%bc/%sc/%rc %sq/%bq %hr %hs %{+Q}r"
timeout connect 5s
timeout client 50s
timeout server 50s
# Stats page with Prometheus metrics
listen stats
bind *:{{pplg_haproxy_stats_port}}
mode http
stats enable
stats uri /metrics
stats refresh 15s
stats show-legends
stats show-node
# Prometheus metrics endpoint
http-request use-service prometheus-exporter if { path /metrics }
# HTTP frontend - redirect all traffic to HTTPS
frontend http_frontend
bind *:80
mode http
option httplog
http-request redirect scheme https code 301
# HTTPS frontend with subdomain-based routing
frontend https_frontend
bind *:443 ssl crt {{pplg_haproxy_cert_path}}
mode http
option httplog
option forwardfor
# Forward original protocol and host
http-request set-header X-Forwarded-Proto https
http-request set-header X-Forwarded-Port %[dst_port]
http-request set-header X-Forwarded-Host %[req.hdr(Host)]
# Security headers
http-response set-header Strict-Transport-Security "max-age=31536000; includeSubDomains"
http-response set-header X-Frame-Options "SAMEORIGIN"
http-response set-header X-Content-Type-Options "nosniff"
http-response set-header X-XSS-Protection "1; mode=block"
# Subdomain ACLs
acl host_grafana hdr_beg(host) -i grafana.{{pplg_haproxy_domain}}
acl host_pgadmin hdr_beg(host) -i pgadmin.{{pplg_haproxy_domain}}
acl host_prometheus hdr_beg(host) -i prometheus.{{pplg_haproxy_domain}}
acl host_loki hdr_beg(host) -i loki.{{pplg_haproxy_domain}}
acl host_alertmanager hdr_beg(host) -i alertmanager.{{pplg_haproxy_domain}}
# Prometheus write API - bypass OAuth2-Proxy (machine-to-machine)
acl is_prometheus_write path_beg /api/v1/write
use_backend backend_grafana if host_grafana
use_backend backend_pgadmin if host_pgadmin
use_backend backend_prometheus_direct if host_prometheus is_prometheus_write
use_backend backend_prometheus if host_prometheus
use_backend backend_loki if host_loki
use_backend backend_alertmanager if host_alertmanager
# Grafana - Native Casdoor OAuth SSO
backend backend_grafana
mode http
balance roundrobin
option httpchk GET /api/health
http-check expect status 200
server grafana_1 127.0.0.1:3000 check
# PgAdmin - Native Casdoor OAuth SSO
backend backend_pgadmin
mode http
balance roundrobin
option httpchk GET /misc/ping
http-check expect status 200
server pgadmin_1 127.0.0.1:{{pgadmin_port}} check
# Prometheus UI - via OAuth2-Proxy sidecar
backend backend_prometheus
mode http
balance roundrobin
option httpchk GET /ping
http-check expect status 200
server prometheus_1 127.0.0.1:{{prometheus_proxy_port}} check
# Prometheus Write API - direct (no auth, machine-to-machine)
backend backend_prometheus_direct
mode http
balance roundrobin
server prometheus_write_1 127.0.0.1:9090 check
# Loki - no auth (machine-to-machine log ingestion)
backend backend_loki
mode http
balance roundrobin
option httpchk GET /ready
http-check expect status 200
server loki_1 127.0.0.1:{{loki_port}} check
# Alertmanager - internal only
backend backend_alertmanager
mode http
balance roundrobin
option httpchk GET /-/healthy
http-check expect status 200
server alertmanager_1 127.0.0.1:{{alertmanager_port}} check

View File

@@ -2,12 +2,11 @@
## Overview ## Overview
PPLG is the consolidated observability and administration stack running on **Prospero**. It bundles PgAdmin, Prometheus, Loki, and Grafana behind an internal HAProxy for TLS termination, with Casdoor SSO for user-facing services and OAuth2-Proxy as a sidecar for Prometheus UI authentication. PPLG is the consolidated observability and administration stack running on **Prospero**. It bundles PgAdmin, Prometheus, Loki, and Grafana with Casdoor SSO for user-facing services and OAuth2-Proxy as a sidecar for Prometheus UI authentication. TLS termination is handled by Titania's HAProxy, which routes directly to each service on Prospero.
**Host:** prospero.incus **Host:** prospero.incus
**Role:** Observability **Role:** Observability
**Incus Ports:** 25510 → 443 (HTTPS), 25511 → 80 (HTTP redirect) **External Access:** Via Titania HAProxy → `prospero.incus` (direct to service ports)
**External Access:** Via Titania HAProxy → `prospero.incus:443`
| Subdomain | Service | Auth Method | | Subdomain | Service | Auth Method |
|-----------|---------|-------------| |-----------|---------|-------------|
@@ -23,33 +22,23 @@ PPLG is the consolidated observability and administration stack running on **Pro
┌──────────┐ ┌────────────┐ ┌─────────────────────────────────────────────────┐ ┌──────────┐ ┌────────────┐ ┌─────────────────────────────────────────────────┐
│ Client │─────▶│ HAProxy │─────▶│ Prospero (PPLG) │ │ Client │─────▶│ HAProxy │─────▶│ Prospero (PPLG) │
│ │ │ (Titania) │ │ │ │ │ │ (Titania) │ │ │
└──────────┘ │ :443 → :443 │ ┌──────────────────────────────────────────┐ └──────────┘ │ :443 TLS │ │ Grafana (:3000) — Casdoor OAuth
└────────────┘ HAProxy (systemd, :443/:80) │ termination│PgAdmin (:5050) — Casdoor OAuth
│ │ TLS termination + subdomain routing ┌──────────┐ └────────────┘ │ OAuth2-Proxy (:9091) → Prometheus (:9090)
┌──────────┐ └───┬──────┬──────┬──────┬──────┬──────────┘ │ Alloy │─────────────────────────▶│ Loki (:3100) — no auth
Alloy │──push──────────────────────────▶│ │ │ │ (agents) │ │ Alertmanager (:9093) — no auth
│ (agents) │ loki.ouranos.helu.ca │ │ │ └──────────┘ └─────────────────────────────────────────────────┘
│ │ prometheus.ouranos.helu.ca │ │ │ │ │
└──────────┘ │ ▼ ▼ ▼ ▼ ▼ │
│ Grafana PgAdmin OAuth2 Loki Alertmanager │
│ :3000 :5050 Proxy :3100 :9093 │
│ :9091 │
│ │ │
│ ▼ │
│ Prometheus │
│ :9090 │
└─────────────────────────────────────────────────┘
``` ```
### Traffic Flow ### Traffic Flow
| Source | Destination | Path | Auth | | Source | Destination | Path | Auth |
|--------|-------------|------|------| |--------|-------------|------|------|
| Browser → Grafana | Titania :443 → Prospero :443 → HAProxy → :3000 | Subdomain ACL | Casdoor OAuth | | Browser → Grafana | Titania :443 → Prospero :3000 | Subdomain ACL | Casdoor OAuth |
| Browser → PgAdmin | Titania :443 → Prospero :443 → HAProxy → :5050 | Subdomain ACL | Casdoor OAuth | | Browser → PgAdmin | Titania :443 → Prospero :5050 | Subdomain ACL | Casdoor OAuth |
| Browser → Prometheus | Titania :443 → Prospero :443 → HAProxy → OAuth2-Proxy :9091 → :9090 | Subdomain ACL | OAuth2-Proxy → Casdoor | | Browser → Prometheus | Titania :443 → Prospero :9091 (OAuth2-Proxy) → :9090 | Subdomain ACL | OAuth2-Proxy → Casdoor |
| Alloy → Loki | `https://loki.ouranos.helu.ca` → HAProxy :443 → :3100 | Subdomain ACL | None | | Alloy → Loki | Titania :443 → Prospero :3100 | Subdomain ACL | None |
| Alloy → Prometheus | `https://prometheus.ouranos.helu.ca/api/v1/write`HAProxy :443 → :9090 | `skip_auth_route` | None | | Alloy → Prometheus | Titania :443 → Prospero :9091 → :9090 | `skip_auth_routes` | None |
## Deployment ## Deployment
@@ -72,7 +61,6 @@ ansible-playbook pplg/deploy.yml
| File | Purpose | | File | Purpose |
|------|---------| |------|---------|
| `pplg/deploy.yml` | Main consolidated deployment playbook | | `pplg/deploy.yml` | Main consolidated deployment playbook |
| `pplg/pplg-haproxy.cfg.j2` | HAProxy TLS termination config (5 backends) |
| `pplg/prometheus.yml.j2` | Prometheus scrape configuration | | `pplg/prometheus.yml.j2` | Prometheus scrape configuration |
| `pplg/alert_rules.yml.j2` | Prometheus alerting rules | | `pplg/alert_rules.yml.j2` | Prometheus alerting rules |
| `pplg/alertmanager.yml.j2` | Alertmanager routing and Pushover notifications | | `pplg/alertmanager.yml.j2` | Alertmanager routing and Pushover notifications |
@@ -88,15 +76,13 @@ ansible-playbook pplg/deploy.yml
### Deployment Steps ### Deployment Steps
1. **APT Repositories**: Add Grafana and PgAdmin repos 1. **APT Repositories**: Add Grafana and PgAdmin repos
2. **Install Packages**: haproxy, prometheus, loki, grafana, pgadmin4-web, gunicorn 2. **Install Packages**: prometheus, loki, grafana, pgadmin4-web
3. **Prometheus**: Config, alert rules, systemd override for remote write receiver 3. **Prometheus**: Config, alert rules, systemd override for remote write receiver
4. **Alertmanager**: Install, config with Pushover integration 4. **Alertmanager**: Install, config with Pushover integration
5. **Loki**: Create user/dirs, template config 5. **Loki**: Create user/dirs, template config
6. **Grafana**: Provisioning (datasources, users, dashboards), OAuth config 6. **Grafana**: Provisioning (datasources, users, dashboards), OAuth config
7. **PgAdmin**: Create user/dirs, gunicorn systemd service, Casdoor OAuth config 7. **PgAdmin**: Create user/dirs, gunicorn systemd service, Casdoor OAuth config
8. **OAuth2-Proxy**: Download binary (v7.6.0), config for Prometheus sidecar 8. **OAuth2-Proxy**: Download binary (v7.6.0), config for Prometheus sidecar
9. **SSL Certificate**: Fetch Let's Encrypt wildcard cert from Titania (self-signed fallback)
10. **HAProxy**: Template config, enable and start systemd service
### Deployment Order ### Deployment Order
@@ -298,35 +284,18 @@ Register in Casdoor Admin UI (`https://id.ouranos.helu.ca`) or add to `ansible/c
| **Loki** | None | Machine-to-machine (Alloy agents push logs) | | **Loki** | None | Machine-to-machine (Alloy agents push logs) |
| **Alertmanager** | None | Internal only | | **Alertmanager** | None | Internal only |
## HAProxy Configuration ## OAuth2-Proxy skip_auth_routes
### Backends The Prometheus write API (`/api/v1/write`) and health check (`/ping`) are accessed by Alloy agents for machine-to-machine metric pushes. OAuth2-Proxy's `skip_auth_routes` config bypasses authentication for these paths:
| Backend | Upstream | Health Check | Auth | ```toml
|---------|----------|-------------|------| skip_auth_routes = [
| `backend_grafana` | `127.0.0.1:3000` | `GET /api/health` | Grafana OAuth | "^/ping$",
| `backend_pgadmin` | `127.0.0.1:5050` | `GET /misc/ping` | PgAdmin OAuth | "^/api/v1/write$"
| `backend_prometheus` | `127.0.0.1:9091` (OAuth2-Proxy) | `GET /ping` | OAuth2-Proxy | ]
| `backend_prometheus_direct` | `127.0.0.1:9090` | — | None (write API) |
| `backend_loki` | `127.0.0.1:3100` | `GET /ready` | None |
| `backend_alertmanager` | `127.0.0.1:9093` | `GET /-/healthy` | None |
### skip_auth_route Pattern
The Prometheus write API (`/api/v1/write`) is accessed by Alloy agents for machine-to-machine metric pushes. HAProxy uses an ACL to bypass OAuth2-Proxy:
```
acl is_prometheus_write path_beg /api/v1/write
use_backend backend_prometheus_direct if host_prometheus is_prometheus_write
``` ```
This routes `https://prometheus.ouranos.helu.ca/api/v1/write` directly to Prometheus on `:9090`, while all other Prometheus traffic goes through OAuth2-Proxy on `:9091`. This allows `https://prometheus.ouranos.helu.ca/api/v1/write` to reach Prometheus without OAuth, while all other Prometheus traffic requires Casdoor SSO authentication.
### SSL Certificate
- **Primary**: Let's Encrypt wildcard cert (`*.ouranos.helu.ca`) fetched from Titania
- **Fallback**: Self-signed cert generated on Prospero (if Titania unavailable)
- **Path**: `/etc/haproxy/certs/ouranos.pem`
## Host Variables ## Host Variables
@@ -340,7 +309,7 @@ services:
``` ```
Key variable groups defined in `prospero.incus.yml`: Key variable groups defined in `prospero.incus.yml`:
- PPLG HAProxy (user, group, uid/gid 800, syslog port) - PPLG domain (`ouranos.helu.ca`)
- Grafana (datasources, users, OAuth config) - Grafana (datasources, users, OAuth config)
- Prometheus (scrape targets, OAuth2-Proxy sidecar config) - Prometheus (scrape targets, OAuth2-Proxy sidecar config)
- Alertmanager (Pushover integration) - Alertmanager (Pushover integration)
@@ -348,56 +317,36 @@ Key variable groups defined in `prospero.incus.yml`:
- PgAdmin (user, data/log directories, OAuth config) - PgAdmin (user, data/log directories, OAuth config)
- Casdoor Metrics (access key/secret for Prometheus scraping) - Casdoor Metrics (access key/secret for Prometheus scraping)
## Terraform ## Titania Backend Routing
### Prospero Port Mapping Titania's HAProxy routes external subdomains directly to Prospero service ports:
```hcl
devices = [
{
name = "https_internal"
type = "proxy"
properties = {
listen = "tcp:0.0.0.0:25510"
connect = "tcp:127.0.0.1:443"
}
},
{
name = "http_redirect"
type = "proxy"
properties = {
listen = "tcp:0.0.0.0:25511"
connect = "tcp:127.0.0.1:80"
}
}
]
```
Run `terraform apply` before deploying if port mappings changed.
### Titania Backend Routing
Titania's HAProxy routes external subdomains to Prospero's HTTPS port:
```yaml ```yaml
# In titania.incus.yml haproxy_backends # In titania.incus.yml haproxy_backends
- subdomain: "grafana" - subdomain: "grafana"
backend_host: "prospero.incus" backend_host: "prospero.incus"
backend_port: 443 backend_port: 3000
health_path: "/api/health" health_path: "/api/health"
ssl_backend: true
- subdomain: "pgadmin" - subdomain: "pgadmin"
backend_host: "prospero.incus" backend_host: "prospero.incus"
backend_port: 443 backend_port: 5050
health_path: "/misc/ping" health_path: "/misc/ping"
ssl_backend: true
- subdomain: "prometheus" - subdomain: "prometheus"
backend_host: "prospero.incus" backend_host: "prospero.incus"
backend_port: 443 backend_port: 9091 # OAuth2-Proxy sidecar
health_path: "/ping" health_path: "/ping"
ssl_backend: true
- subdomain: "loki"
backend_host: "prospero.incus"
backend_port: 3100
health_path: "/ready"
- subdomain: "alertmanager"
backend_host: "prospero.incus"
backend_port: 9093
health_path: "/-/healthy"
``` ```
## Monitoring ## Monitoring
@@ -406,7 +355,6 @@ Titania's HAProxy routes external subdomains to Prospero's HTTPS port:
**File:** `ansible/alloy/prospero/config.alloy.j2` **File:** `ansible/alloy/prospero/config.alloy.j2`
- **HAProxy Syslog**: `loki.source.syslog` on `127.0.0.1:51405` (TCP) receives Docker syslog from HAProxy container
- **Journal Labels**: Dedicated job labels for `grafana-server`, `prometheus`, `loki`, `alertmanager`, `pgadmin`, `oauth2-proxy-prometheus` - **Journal Labels**: Dedicated job labels for `grafana-server`, `prometheus`, `loki`, `alertmanager`, `pgadmin`, `oauth2-proxy-prometheus`
- **System Logs**: `/var/log/syslog`, `/var/log/auth.log` → Loki - **System Logs**: `/var/log/syslog`, `/var/log/auth.log` → Loki
- **Metrics**: Node exporter + process exporter → Prometheus remote write - **Metrics**: Node exporter + process exporter → Prometheus remote write
@@ -477,22 +425,11 @@ ssh prospero.incus
sudo systemctl status prometheus grafana-server loki prometheus-alertmanager pgadmin oauth2-proxy-prometheus sudo systemctl status prometheus grafana-server loki prometheus-alertmanager pgadmin oauth2-proxy-prometheus
``` ```
### HAProxy Service
```bash
ssh prospero.incus
sudo systemctl status haproxy
sudo journalctl -u haproxy -f
```
### View Logs ### View Logs
```bash ```bash
# All PPLG services via journal # All PPLG services via journal
sudo journalctl -u prometheus -u grafana-server -u loki -u prometheus-alertmanager -u pgadmin -u oauth2-proxy-prometheus -f sudo journalctl -u prometheus -u grafana-server -u loki -u prometheus-alertmanager -u pgadmin -u oauth2-proxy-prometheus -f
# HAProxy logs (shipped via syslog to Alloy → Loki)
# Query in Grafana: {job="pplg-haproxy"}
``` ```
### Test Endpoints (from Prospero) ### Test Endpoints (from Prospero)
@@ -512,18 +449,17 @@ curl -s http://127.0.0.1:3100/ready
# Alertmanager # Alertmanager
curl -s http://127.0.0.1:9093/-/healthy curl -s http://127.0.0.1:9093/-/healthy
# HAProxy stats
curl -s http://127.0.0.1:8404/metrics | head
``` ```
### Test TLS (from any host) ### Test External Access (from any host)
```bash ```bash
# Direct to Prospero container
curl -sk https://prospero.incus/api/health
# Via Titania HAProxy # Via Titania HAProxy
curl -s https://grafana.ouranos.helu.ca/api/health curl -s https://grafana.ouranos.helu.ca/api/health
curl -s https://pgadmin.ouranos.helu.ca/misc/ping
curl -s https://prometheus.ouranos.helu.ca/ping
curl -s https://loki.ouranos.helu.ca/ready
curl -s https://alertmanager.ouranos.helu.ca/-/healthy
``` ```
### Common Errors ### Common Errors