From 0f21380fd0e822f1ed37cdc21446b16db1257e74 Mon Sep 17 00:00:00 2001 From: Robert Helewka Date: Wed, 8 Apr 2026 17:57:09 +0000 Subject: [PATCH] refactor: remove HAProxy from Prospero, centralize TLS on Titania Move TLS termination and reverse proxying entirely to Titania's HAProxy, eliminating the redundant HAProxy instance on Prospero. Backends now communicate over plain HTTP within the internal network. - Remove HAProxy container, config, certs, and syslog from Prospero - Remove ssl_backend flags from Titania backend definitions - Replace pplg_haproxy_* vars with single pplg_domain variable - Remove HAProxy syslog source from Alloy config - Update OAuth2-Proxy to listen on all interfaces for Titania access --- ansible/alloy/prospero/config.alloy.j2 | 16 +- .../inventory/host_vars/prospero.incus.yml | 13 +- ansible/inventory/host_vars/titania.incus.yml | 7 +- ansible/pplg/deploy.yml | 86 +--------- ansible/pplg/oauth2-proxy-prometheus.cfg.j2 | 9 +- ansible/pplg/pgadmin.service.j2 | 2 +- ansible/pplg/pplg-haproxy.cfg.j2 | 127 --------------- docs/pplg.md | 150 +++++------------- 8 files changed, 56 insertions(+), 354 deletions(-) delete mode 100644 ansible/pplg/pplg-haproxy.cfg.j2 diff --git a/ansible/alloy/prospero/config.alloy.j2 b/ansible/alloy/prospero/config.alloy.j2 index b0c09b4..0f23c93 100644 --- a/ansible/alloy/prospero/config.alloy.j2 +++ b/ansible/alloy/prospero/config.alloy.j2 @@ -1,6 +1,6 @@ // Prospero Alloy Configuration // Red Panda Approved 🐼 -// Services: PPLG stack (Grafana, Prometheus, Loki, Alertmanager, PgAdmin, HAProxy, OAuth2-Proxy) +// Services: PPLG stack (Grafana, Prometheus, Loki, Alertmanager, PgAdmin, OAuth2-Proxy) logging { level = "{{alloy_log_level}}" @@ -19,20 +19,6 @@ loki.source.file "system_logs" { forward_to = [loki.write.default.receiver] } -// PPLG HAProxy syslog receiver (HAProxy syslog β†’ Alloy β†’ Loki) -loki.source.syslog "pplg_haproxy" { - listener { - address = "127.0.0.1:{{pplg_haproxy_syslog_port}}" - protocol = "tcp" - labels = { - job = "pplg-haproxy", - hostname = "{{inventory_hostname}}", - environment = "{{deployment_environment}}", - } - } - forward_to = [loki.write.default.receiver] -} - // Journal relabeling - assign dedicated job labels per systemd unit loki.relabel "journal" { forward_to = [] diff --git a/ansible/inventory/host_vars/prospero.incus.yml b/ansible/inventory/host_vars/prospero.incus.yml index 2745a42..37c7738 100644 --- a/ansible/inventory/host_vars/prospero.incus.yml +++ b/ansible/inventory/host_vars/prospero.incus.yml @@ -1,6 +1,6 @@ --- # Prospero Configuration - PPLG Observability & Admin Stack -# Services: pplg (PgAdmin, Prometheus, Loki, Grafana + HAProxy + OAuth2-Proxy) +# Services: pplg (PgAdmin, Prometheus, Loki, Grafana + OAuth2-Proxy) ansible_user: robert @@ -12,17 +12,10 @@ services: alloy_log_level: "warn" # ============================================================================ -# PPLG HAProxy Configuration +# PPLG Domain (TLS termination handled by Titania HAProxy) # ============================================================================ -pplg_haproxy_user: haproxy -pplg_haproxy_group: haproxy -pplg_haproxy_uid: 800 -pplg_haproxy_gid: 800 -pplg_haproxy_domain: "ouranos.helu.ca" -pplg_haproxy_cert_path: /etc/haproxy/certs/ouranos.pem -pplg_haproxy_stats_port: 8404 -pplg_haproxy_syslog_port: 51405 +pplg_domain: "ouranos.helu.ca" # ============================================================================ # Grafana diff --git a/ansible/inventory/host_vars/titania.incus.yml b/ansible/inventory/host_vars/titania.incus.yml index 3d2a835..cc71120 100644 --- a/ansible/inventory/host_vars/titania.incus.yml +++ b/ansible/inventory/host_vars/titania.incus.yml @@ -89,31 +89,26 @@ haproxy_backends: backend_host: "prospero.incus" backend_port: 5050 health_path: "/misc/ping" - ssl_backend: true - subdomain: "grafana" backend_host: "prospero.incus" backend_port: 3000 health_path: "/api/health" - ssl_backend: true - subdomain: "prometheus" backend_host: "prospero.incus" - backend_port: 9090 + backend_port: 9091 # OAuth2-Proxy sidecar (skips auth for /api/v1/write and /ping) health_path: "/ping" - ssl_backend: true - subdomain: "loki" backend_host: "prospero.incus" backend_port: 3100 health_path: "/ready" - ssl_backend: true - subdomain: "alertmanager" backend_host: "prospero.incus" backend_port: 9093 health_path: "/-/healthy" - ssl_backend: true - subdomain: "gitea" backend_host: "rosalind.incus" diff --git a/ansible/pplg/deploy.yml b/ansible/pplg/deploy.yml index 17f84e0..b820c5f 100644 --- a/ansible/pplg/deploy.yml +++ b/ansible/pplg/deploy.yml @@ -1,6 +1,7 @@ --- # PPLG - Consolidated Observability & Admin Stack for Prospero -# PgAdmin, Prometheus, Loki, Grafana + HAProxy (TLS) + OAuth2-Proxy (Prometheus UI) +# PgAdmin, Prometheus, Loki, Grafana + OAuth2-Proxy (Prometheus UI) +# TLS termination handled by Titania HAProxy # Red Panda Approved - name: Deploy PPLG Stack @@ -47,7 +48,6 @@ ansible.builtin.apt: name: - acl - - haproxy - prometheus - loki - grafana @@ -372,83 +372,6 @@ state: started daemon_reload: true - # =========================================================================== - # SSL Certificate Distribution (from Titania) - # =========================================================================== - - - name: Create haproxy group - ansible.builtin.group: - name: "{{pplg_haproxy_group}}" - gid: "{{pplg_haproxy_gid}}" - system: true - - - name: Create haproxy user - ansible.builtin.user: - name: "{{pplg_haproxy_user}}" - comment: "PPLG HAProxy" - group: "{{pplg_haproxy_group}}" - uid: "{{pplg_haproxy_uid}}" - system: true - - - name: Create HAProxy directories - ansible.builtin.file: - path: "{{item}}" - state: directory - owner: "{{pplg_haproxy_user}}" - group: "{{pplg_haproxy_group}}" - mode: '750' - loop: - - /etc/haproxy - - /etc/haproxy/certs - - - name: Fetch wildcard certificate from Titania - ansible.builtin.fetch: - src: /etc/haproxy/certs/ouranos.pem - dest: /tmp/ouranos-haproxy.pem - flat: yes - delegate_to: titania.incus - when: "'titania.incus' in groups['ubuntu']" - - - name: Deploy wildcard certificate - ansible.builtin.copy: - src: /tmp/ouranos-haproxy.pem - dest: "{{pplg_haproxy_cert_path}}" - owner: "{{pplg_haproxy_user}}" - group: "{{pplg_haproxy_group}}" - mode: '0640' - when: "'titania.incus' in groups['ubuntu']" - - - name: Generate self-signed wildcard certificate (fallback) - command: > - openssl req -x509 -nodes -days 365 -newkey rsa:2048 - -keyout {{pplg_haproxy_cert_path}} - -out {{pplg_haproxy_cert_path}} - -subj "/C=US/ST=State/L=City/O=Ouranos/CN=*.{{pplg_haproxy_domain}}" - -addext "subjectAltName=DNS:*.{{pplg_haproxy_domain}},DNS:{{pplg_haproxy_domain}}" - when: "'titania.incus' not in groups['ubuntu']" - args: - creates: "{{pplg_haproxy_cert_path}}" - - # =========================================================================== - # HAProxy (TLS Termination) - # =========================================================================== - - - name: Template HAProxy configuration - ansible.builtin.template: - src: pplg-haproxy.cfg.j2 - dest: /etc/haproxy/haproxy.cfg - owner: "{{pplg_haproxy_user}}" - group: "{{pplg_haproxy_group}}" - mode: "640" - validate: haproxy -c -f %s - notify: restart haproxy - - - name: Enable and start HAProxy service - ansible.builtin.systemd: - name: haproxy - enabled: true - state: started - # =========================================================================== # Handlers # =========================================================================== @@ -484,11 +407,6 @@ ansible.builtin.systemd: daemon_reload: true - - name: restart haproxy - ansible.builtin.systemd: - name: haproxy - state: reloaded - - name: restart oauth2-proxy-prometheus ansible.builtin.systemd: name: oauth2-proxy-prometheus diff --git a/ansible/pplg/oauth2-proxy-prometheus.cfg.j2 b/ansible/pplg/oauth2-proxy-prometheus.cfg.j2 index caa894b..4081d24 100644 --- a/ansible/pplg/oauth2-proxy-prometheus.cfg.j2 +++ b/ansible/pplg/oauth2-proxy-prometheus.cfg.j2 @@ -10,7 +10,7 @@ client_id = "{{prometheus_oauth2_client_id}}" client_secret = "{{prometheus_oauth2_client_secret}}" # Redirect URL after authentication -redirect_url = "https://prometheus.{{pplg_haproxy_domain}}/oauth2/callback" +redirect_url = "https://prometheus.{{pplg_domain}}/oauth2/callback" # Upstream service (Prometheus) upstreams = [ @@ -24,7 +24,7 @@ cookie_secure = true cookie_httponly = true cookie_expire = "168h" cookie_refresh = "1h" -cookie_domains = ".{{pplg_haproxy_domain}}" +cookie_domains = ".{{pplg_domain}}" session_store_type = "cookie" # Authentication settings @@ -49,9 +49,10 @@ http_address = "0.0.0.0:{{prometheus_proxy_port}}" reverse_proxy = true real_client_ip_header = "X-Forwarded-For" -# Skip authentication for health check endpoints +# Skip authentication for health check and machine-to-machine endpoints skip_auth_routes = [ - "^/ping$" + "^/ping$", + "^/api/v1/write$" ] # OIDC specific settings diff --git a/ansible/pplg/pgadmin.service.j2 b/ansible/pplg/pgadmin.service.j2 index 7d299f5..c582690 100644 --- a/ansible/pplg/pgadmin.service.j2 +++ b/ansible/pplg/pgadmin.service.j2 @@ -9,7 +9,7 @@ User={{pgadmin_user}} Group={{pgadmin_group}} WorkingDirectory=/usr/pgadmin4/web ExecStart=/usr/pgadmin4/venv/bin/python3 -m gunicorn pgAdmin4:app \ - --bind 127.0.0.1:{{pgadmin_port}} \ + --bind 0.0.0.0:{{pgadmin_port}} \ --workers 1 \ --threads 4 \ --timeout 120 \ diff --git a/ansible/pplg/pplg-haproxy.cfg.j2 b/ansible/pplg/pplg-haproxy.cfg.j2 deleted file mode 100644 index 4fb4a7f..0000000 --- a/ansible/pplg/pplg-haproxy.cfg.j2 +++ /dev/null @@ -1,127 +0,0 @@ -# PPLG HAProxy - Internal TLS Termination for Prospero -# Services: Grafana, PgAdmin, Prometheus (via OAuth2-Proxy), Loki, Alertmanager -# Managed by Ansible - Red Panda Approved - -global - log 127.0.0.1:{{pplg_haproxy_syslog_port}} local0 - stats timeout 30s - - # Default SSL material locations - ca-base /etc/ssl/certs - crt-base /etc/ssl/private - - # SSL/TLS configuration - ssl-default-bind-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384 - ssl-default-bind-ciphersuites TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256 - ssl-default-bind-options ssl-min-ver TLSv1.2 no-tls-tickets - -defaults - log global - mode http - option httplog - option dontlognull - log-format "%ci:%cp [%tr] %ft %b/%s %TR/%Tw/%Tc/%Tr/%Ta %ST %B %CC %CS %tsc %ac/%fc/%bc/%sc/%rc %sq/%bq %hr %hs %{+Q}r" - timeout connect 5s - timeout client 50s - timeout server 50s - -# Stats page with Prometheus metrics -listen stats - bind *:{{pplg_haproxy_stats_port}} - mode http - stats enable - stats uri /metrics - stats refresh 15s - stats show-legends - stats show-node - - # Prometheus metrics endpoint - http-request use-service prometheus-exporter if { path /metrics } - -# HTTP frontend - redirect all traffic to HTTPS -frontend http_frontend - bind *:80 - mode http - option httplog - http-request redirect scheme https code 301 - -# HTTPS frontend with subdomain-based routing -frontend https_frontend - bind *:443 ssl crt {{pplg_haproxy_cert_path}} - mode http - option httplog - option forwardfor - - # Forward original protocol and host - http-request set-header X-Forwarded-Proto https - http-request set-header X-Forwarded-Port %[dst_port] - http-request set-header X-Forwarded-Host %[req.hdr(Host)] - - # Security headers - http-response set-header Strict-Transport-Security "max-age=31536000; includeSubDomains" - http-response set-header X-Frame-Options "SAMEORIGIN" - http-response set-header X-Content-Type-Options "nosniff" - http-response set-header X-XSS-Protection "1; mode=block" - - # Subdomain ACLs - acl host_grafana hdr_beg(host) -i grafana.{{pplg_haproxy_domain}} - acl host_pgadmin hdr_beg(host) -i pgadmin.{{pplg_haproxy_domain}} - acl host_prometheus hdr_beg(host) -i prometheus.{{pplg_haproxy_domain}} - acl host_loki hdr_beg(host) -i loki.{{pplg_haproxy_domain}} - acl host_alertmanager hdr_beg(host) -i alertmanager.{{pplg_haproxy_domain}} - - # Prometheus write API - bypass OAuth2-Proxy (machine-to-machine) - acl is_prometheus_write path_beg /api/v1/write - - use_backend backend_grafana if host_grafana - use_backend backend_pgadmin if host_pgadmin - use_backend backend_prometheus_direct if host_prometheus is_prometheus_write - use_backend backend_prometheus if host_prometheus - use_backend backend_loki if host_loki - use_backend backend_alertmanager if host_alertmanager - -# Grafana - Native Casdoor OAuth SSO -backend backend_grafana - mode http - balance roundrobin - option httpchk GET /api/health - http-check expect status 200 - server grafana_1 127.0.0.1:3000 check - -# PgAdmin - Native Casdoor OAuth SSO -backend backend_pgadmin - mode http - balance roundrobin - option httpchk GET /misc/ping - http-check expect status 200 - server pgadmin_1 127.0.0.1:{{pgadmin_port}} check - -# Prometheus UI - via OAuth2-Proxy sidecar -backend backend_prometheus - mode http - balance roundrobin - option httpchk GET /ping - http-check expect status 200 - server prometheus_1 127.0.0.1:{{prometheus_proxy_port}} check - -# Prometheus Write API - direct (no auth, machine-to-machine) -backend backend_prometheus_direct - mode http - balance roundrobin - server prometheus_write_1 127.0.0.1:9090 check - -# Loki - no auth (machine-to-machine log ingestion) -backend backend_loki - mode http - balance roundrobin - option httpchk GET /ready - http-check expect status 200 - server loki_1 127.0.0.1:{{loki_port}} check - -# Alertmanager - internal only -backend backend_alertmanager - mode http - balance roundrobin - option httpchk GET /-/healthy - http-check expect status 200 - server alertmanager_1 127.0.0.1:{{alertmanager_port}} check diff --git a/docs/pplg.md b/docs/pplg.md index b6bb5c5..7a22a80 100644 --- a/docs/pplg.md +++ b/docs/pplg.md @@ -2,12 +2,11 @@ ## Overview -PPLG is the consolidated observability and administration stack running on **Prospero**. It bundles PgAdmin, Prometheus, Loki, and Grafana behind an internal HAProxy for TLS termination, with Casdoor SSO for user-facing services and OAuth2-Proxy as a sidecar for Prometheus UI authentication. +PPLG is the consolidated observability and administration stack running on **Prospero**. It bundles PgAdmin, Prometheus, Loki, and Grafana with Casdoor SSO for user-facing services and OAuth2-Proxy as a sidecar for Prometheus UI authentication. TLS termination is handled by Titania's HAProxy, which routes directly to each service on Prospero. **Host:** prospero.incus **Role:** Observability -**Incus Ports:** 25510 β†’ 443 (HTTPS), 25511 β†’ 80 (HTTP redirect) -**External Access:** Via Titania HAProxy β†’ `prospero.incus:443` +**External Access:** Via Titania HAProxy β†’ `prospero.incus` (direct to service ports) | Subdomain | Service | Auth Method | |-----------|---------|-------------| @@ -23,33 +22,23 @@ PPLG is the consolidated observability and administration stack running on **Pro β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ Client │─────▢│ HAProxy │─────▢│ Prospero (PPLG) β”‚ β”‚ β”‚ β”‚ (Titania) β”‚ β”‚ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ :443 β†’ :443 β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ - β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ HAProxy (systemd, :443/:80) β”‚ β”‚ - β”‚ β”‚ TLS termination + subdomain routing β”‚ β”‚ -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β””β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β”‚ Alloy │──push──────────────────────────▢│ β”‚ β”‚ β”‚ β”‚ -β”‚ (agents) β”‚ loki.ouranos.helu.ca β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ -β”‚ β”‚ prometheus.ouranos.helu.ca β”‚ β”‚ β”‚ β”‚ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β–Ό β–Ό β–Ό β–Ό β–Ό β”‚ - β”‚ Grafana PgAdmin OAuth2 Loki Alertmanager β”‚ - β”‚ :3000 :5050 Proxy :3100 :9093 β”‚ - β”‚ :9091 β”‚ - β”‚ β”‚ β”‚ - β”‚ β–Ό β”‚ - β”‚ Prometheus β”‚ - β”‚ :9090 β”‚ - β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ :443 TLS β”‚ β”‚ Grafana (:3000) β€” Casdoor OAuth β”‚ + β”‚ terminationβ”‚ β”‚ PgAdmin (:5050) β€” Casdoor OAuth β”‚ +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ OAuth2-Proxy (:9091) β†’ Prometheus (:9090) β”‚ +β”‚ Alloy │─────────────────────────▢│ Loki (:3100) β€” no auth β”‚ +β”‚ (agents) β”‚ β”‚ Alertmanager (:9093) β€” no auth β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ ``` ### Traffic Flow | Source | Destination | Path | Auth | |--------|-------------|------|------| -| Browser β†’ Grafana | Titania :443 β†’ Prospero :443 β†’ HAProxy β†’ :3000 | Subdomain ACL | Casdoor OAuth | -| Browser β†’ PgAdmin | Titania :443 β†’ Prospero :443 β†’ HAProxy β†’ :5050 | Subdomain ACL | Casdoor OAuth | -| Browser β†’ Prometheus | Titania :443 β†’ Prospero :443 β†’ HAProxy β†’ OAuth2-Proxy :9091 β†’ :9090 | Subdomain ACL | OAuth2-Proxy β†’ Casdoor | -| Alloy β†’ Loki | `https://loki.ouranos.helu.ca` β†’ HAProxy :443 β†’ :3100 | Subdomain ACL | None | -| Alloy β†’ Prometheus | `https://prometheus.ouranos.helu.ca/api/v1/write` β†’ HAProxy :443 β†’ :9090 | `skip_auth_route` | None | +| Browser β†’ Grafana | Titania :443 β†’ Prospero :3000 | Subdomain ACL | Casdoor OAuth | +| Browser β†’ PgAdmin | Titania :443 β†’ Prospero :5050 | Subdomain ACL | Casdoor OAuth | +| Browser β†’ Prometheus | Titania :443 β†’ Prospero :9091 (OAuth2-Proxy) β†’ :9090 | Subdomain ACL | OAuth2-Proxy β†’ Casdoor | +| Alloy β†’ Loki | Titania :443 β†’ Prospero :3100 | Subdomain ACL | None | +| Alloy β†’ Prometheus | Titania :443 β†’ Prospero :9091 β†’ :9090 | `skip_auth_routes` | None | ## Deployment @@ -72,7 +61,6 @@ ansible-playbook pplg/deploy.yml | File | Purpose | |------|---------| | `pplg/deploy.yml` | Main consolidated deployment playbook | -| `pplg/pplg-haproxy.cfg.j2` | HAProxy TLS termination config (5 backends) | | `pplg/prometheus.yml.j2` | Prometheus scrape configuration | | `pplg/alert_rules.yml.j2` | Prometheus alerting rules | | `pplg/alertmanager.yml.j2` | Alertmanager routing and Pushover notifications | @@ -88,15 +76,13 @@ ansible-playbook pplg/deploy.yml ### Deployment Steps 1. **APT Repositories**: Add Grafana and PgAdmin repos -2. **Install Packages**: haproxy, prometheus, loki, grafana, pgadmin4-web, gunicorn +2. **Install Packages**: prometheus, loki, grafana, pgadmin4-web 3. **Prometheus**: Config, alert rules, systemd override for remote write receiver 4. **Alertmanager**: Install, config with Pushover integration 5. **Loki**: Create user/dirs, template config 6. **Grafana**: Provisioning (datasources, users, dashboards), OAuth config 7. **PgAdmin**: Create user/dirs, gunicorn systemd service, Casdoor OAuth config 8. **OAuth2-Proxy**: Download binary (v7.6.0), config for Prometheus sidecar -9. **SSL Certificate**: Fetch Let's Encrypt wildcard cert from Titania (self-signed fallback) -10. **HAProxy**: Template config, enable and start systemd service ### Deployment Order @@ -298,35 +284,18 @@ Register in Casdoor Admin UI (`https://id.ouranos.helu.ca`) or add to `ansible/c | **Loki** | None | Machine-to-machine (Alloy agents push logs) | | **Alertmanager** | None | Internal only | -## HAProxy Configuration +## OAuth2-Proxy skip_auth_routes -### Backends +The Prometheus write API (`/api/v1/write`) and health check (`/ping`) are accessed by Alloy agents for machine-to-machine metric pushes. OAuth2-Proxy's `skip_auth_routes` config bypasses authentication for these paths: -| Backend | Upstream | Health Check | Auth | -|---------|----------|-------------|------| -| `backend_grafana` | `127.0.0.1:3000` | `GET /api/health` | Grafana OAuth | -| `backend_pgadmin` | `127.0.0.1:5050` | `GET /misc/ping` | PgAdmin OAuth | -| `backend_prometheus` | `127.0.0.1:9091` (OAuth2-Proxy) | `GET /ping` | OAuth2-Proxy | -| `backend_prometheus_direct` | `127.0.0.1:9090` | β€” | None (write API) | -| `backend_loki` | `127.0.0.1:3100` | `GET /ready` | None | -| `backend_alertmanager` | `127.0.0.1:9093` | `GET /-/healthy` | None | - -### skip_auth_route Pattern - -The Prometheus write API (`/api/v1/write`) is accessed by Alloy agents for machine-to-machine metric pushes. HAProxy uses an ACL to bypass OAuth2-Proxy: - -``` -acl is_prometheus_write path_beg /api/v1/write -use_backend backend_prometheus_direct if host_prometheus is_prometheus_write +```toml +skip_auth_routes = [ + "^/ping$", + "^/api/v1/write$" +] ``` -This routes `https://prometheus.ouranos.helu.ca/api/v1/write` directly to Prometheus on `:9090`, while all other Prometheus traffic goes through OAuth2-Proxy on `:9091`. - -### SSL Certificate - -- **Primary**: Let's Encrypt wildcard cert (`*.ouranos.helu.ca`) fetched from Titania -- **Fallback**: Self-signed cert generated on Prospero (if Titania unavailable) -- **Path**: `/etc/haproxy/certs/ouranos.pem` +This allows `https://prometheus.ouranos.helu.ca/api/v1/write` to reach Prometheus without OAuth, while all other Prometheus traffic requires Casdoor SSO authentication. ## Host Variables @@ -340,7 +309,7 @@ services: ``` Key variable groups defined in `prospero.incus.yml`: -- PPLG HAProxy (user, group, uid/gid 800, syslog port) +- PPLG domain (`ouranos.helu.ca`) - Grafana (datasources, users, OAuth config) - Prometheus (scrape targets, OAuth2-Proxy sidecar config) - Alertmanager (Pushover integration) @@ -348,56 +317,36 @@ Key variable groups defined in `prospero.incus.yml`: - PgAdmin (user, data/log directories, OAuth config) - Casdoor Metrics (access key/secret for Prometheus scraping) -## Terraform +## Titania Backend Routing -### Prospero Port Mapping - -```hcl -devices = [ - { - name = "https_internal" - type = "proxy" - properties = { - listen = "tcp:0.0.0.0:25510" - connect = "tcp:127.0.0.1:443" - } - }, - { - name = "http_redirect" - type = "proxy" - properties = { - listen = "tcp:0.0.0.0:25511" - connect = "tcp:127.0.0.1:80" - } - } -] -``` - -Run `terraform apply` before deploying if port mappings changed. - -### Titania Backend Routing - -Titania's HAProxy routes external subdomains to Prospero's HTTPS port: +Titania's HAProxy routes external subdomains directly to Prospero service ports: ```yaml # In titania.incus.yml haproxy_backends - subdomain: "grafana" backend_host: "prospero.incus" - backend_port: 443 + backend_port: 3000 health_path: "/api/health" - ssl_backend: true - subdomain: "pgadmin" backend_host: "prospero.incus" - backend_port: 443 + backend_port: 5050 health_path: "/misc/ping" - ssl_backend: true - subdomain: "prometheus" backend_host: "prospero.incus" - backend_port: 443 + backend_port: 9091 # OAuth2-Proxy sidecar health_path: "/ping" - ssl_backend: true + +- subdomain: "loki" + backend_host: "prospero.incus" + backend_port: 3100 + health_path: "/ready" + +- subdomain: "alertmanager" + backend_host: "prospero.incus" + backend_port: 9093 + health_path: "/-/healthy" ``` ## Monitoring @@ -406,7 +355,6 @@ Titania's HAProxy routes external subdomains to Prospero's HTTPS port: **File:** `ansible/alloy/prospero/config.alloy.j2` -- **HAProxy Syslog**: `loki.source.syslog` on `127.0.0.1:51405` (TCP) receives Docker syslog from HAProxy container - **Journal Labels**: Dedicated job labels for `grafana-server`, `prometheus`, `loki`, `alertmanager`, `pgadmin`, `oauth2-proxy-prometheus` - **System Logs**: `/var/log/syslog`, `/var/log/auth.log` β†’ Loki - **Metrics**: Node exporter + process exporter β†’ Prometheus remote write @@ -477,22 +425,11 @@ ssh prospero.incus sudo systemctl status prometheus grafana-server loki prometheus-alertmanager pgadmin oauth2-proxy-prometheus ``` -### HAProxy Service - -```bash -ssh prospero.incus -sudo systemctl status haproxy -sudo journalctl -u haproxy -f -``` - ### View Logs ```bash # All PPLG services via journal sudo journalctl -u prometheus -u grafana-server -u loki -u prometheus-alertmanager -u pgadmin -u oauth2-proxy-prometheus -f - -# HAProxy logs (shipped via syslog to Alloy β†’ Loki) -# Query in Grafana: {job="pplg-haproxy"} ``` ### Test Endpoints (from Prospero) @@ -512,18 +449,17 @@ curl -s http://127.0.0.1:3100/ready # Alertmanager curl -s http://127.0.0.1:9093/-/healthy - -# HAProxy stats -curl -s http://127.0.0.1:8404/metrics | head ``` -### Test TLS (from any host) +### Test External Access (from any host) ```bash -# Direct to Prospero container -curl -sk https://prospero.incus/api/health # Via Titania HAProxy curl -s https://grafana.ouranos.helu.ca/api/health +curl -s https://pgadmin.ouranos.helu.ca/misc/ping +curl -s https://prometheus.ouranos.helu.ca/ping +curl -s https://loki.ouranos.helu.ca/ready +curl -s https://alertmanager.ouranos.helu.ca/-/healthy ``` ### Common Errors