refactor: remove HAProxy from Prospero, centralize TLS on Titania
Move TLS termination and reverse proxying entirely to Titania's HAProxy, eliminating the redundant HAProxy instance on Prospero. Backends now communicate over plain HTTP within the internal network. - Remove HAProxy container, config, certs, and syslog from Prospero - Remove ssl_backend flags from Titania backend definitions - Replace pplg_haproxy_* vars with single pplg_domain variable - Remove HAProxy syslog source from Alloy config - Update OAuth2-Proxy to listen on all interfaces for Titania access
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
// Prospero Alloy Configuration
|
||||
// Red Panda Approved 🐼
|
||||
// Services: PPLG stack (Grafana, Prometheus, Loki, Alertmanager, PgAdmin, HAProxy, OAuth2-Proxy)
|
||||
// Services: PPLG stack (Grafana, Prometheus, Loki, Alertmanager, PgAdmin, OAuth2-Proxy)
|
||||
|
||||
logging {
|
||||
level = "{{alloy_log_level}}"
|
||||
@@ -19,20 +19,6 @@ loki.source.file "system_logs" {
|
||||
forward_to = [loki.write.default.receiver]
|
||||
}
|
||||
|
||||
// PPLG HAProxy syslog receiver (HAProxy syslog → Alloy → Loki)
|
||||
loki.source.syslog "pplg_haproxy" {
|
||||
listener {
|
||||
address = "127.0.0.1:{{pplg_haproxy_syslog_port}}"
|
||||
protocol = "tcp"
|
||||
labels = {
|
||||
job = "pplg-haproxy",
|
||||
hostname = "{{inventory_hostname}}",
|
||||
environment = "{{deployment_environment}}",
|
||||
}
|
||||
}
|
||||
forward_to = [loki.write.default.receiver]
|
||||
}
|
||||
|
||||
// Journal relabeling - assign dedicated job labels per systemd unit
|
||||
loki.relabel "journal" {
|
||||
forward_to = []
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
# Prospero Configuration - PPLG Observability & Admin Stack
|
||||
# Services: pplg (PgAdmin, Prometheus, Loki, Grafana + HAProxy + OAuth2-Proxy)
|
||||
# Services: pplg (PgAdmin, Prometheus, Loki, Grafana + OAuth2-Proxy)
|
||||
|
||||
ansible_user: robert
|
||||
|
||||
@@ -12,17 +12,10 @@ services:
|
||||
alloy_log_level: "warn"
|
||||
|
||||
# ============================================================================
|
||||
# PPLG HAProxy Configuration
|
||||
# PPLG Domain (TLS termination handled by Titania HAProxy)
|
||||
# ============================================================================
|
||||
|
||||
pplg_haproxy_user: haproxy
|
||||
pplg_haproxy_group: haproxy
|
||||
pplg_haproxy_uid: 800
|
||||
pplg_haproxy_gid: 800
|
||||
pplg_haproxy_domain: "ouranos.helu.ca"
|
||||
pplg_haproxy_cert_path: /etc/haproxy/certs/ouranos.pem
|
||||
pplg_haproxy_stats_port: 8404
|
||||
pplg_haproxy_syslog_port: 51405
|
||||
pplg_domain: "ouranos.helu.ca"
|
||||
|
||||
# ============================================================================
|
||||
# Grafana
|
||||
|
||||
@@ -89,31 +89,26 @@ haproxy_backends:
|
||||
backend_host: "prospero.incus"
|
||||
backend_port: 5050
|
||||
health_path: "/misc/ping"
|
||||
ssl_backend: true
|
||||
|
||||
- subdomain: "grafana"
|
||||
backend_host: "prospero.incus"
|
||||
backend_port: 3000
|
||||
health_path: "/api/health"
|
||||
ssl_backend: true
|
||||
|
||||
- subdomain: "prometheus"
|
||||
backend_host: "prospero.incus"
|
||||
backend_port: 9090
|
||||
backend_port: 9091 # OAuth2-Proxy sidecar (skips auth for /api/v1/write and /ping)
|
||||
health_path: "/ping"
|
||||
ssl_backend: true
|
||||
|
||||
- subdomain: "loki"
|
||||
backend_host: "prospero.incus"
|
||||
backend_port: 3100
|
||||
health_path: "/ready"
|
||||
ssl_backend: true
|
||||
|
||||
- subdomain: "alertmanager"
|
||||
backend_host: "prospero.incus"
|
||||
backend_port: 9093
|
||||
health_path: "/-/healthy"
|
||||
ssl_backend: true
|
||||
|
||||
- subdomain: "gitea"
|
||||
backend_host: "rosalind.incus"
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
---
|
||||
# PPLG - Consolidated Observability & Admin Stack for Prospero
|
||||
# PgAdmin, Prometheus, Loki, Grafana + HAProxy (TLS) + OAuth2-Proxy (Prometheus UI)
|
||||
# PgAdmin, Prometheus, Loki, Grafana + OAuth2-Proxy (Prometheus UI)
|
||||
# TLS termination handled by Titania HAProxy
|
||||
# Red Panda Approved
|
||||
|
||||
- name: Deploy PPLG Stack
|
||||
@@ -47,7 +48,6 @@
|
||||
ansible.builtin.apt:
|
||||
name:
|
||||
- acl
|
||||
- haproxy
|
||||
- prometheus
|
||||
- loki
|
||||
- grafana
|
||||
@@ -372,83 +372,6 @@
|
||||
state: started
|
||||
daemon_reload: true
|
||||
|
||||
# ===========================================================================
|
||||
# SSL Certificate Distribution (from Titania)
|
||||
# ===========================================================================
|
||||
|
||||
- name: Create haproxy group
|
||||
ansible.builtin.group:
|
||||
name: "{{pplg_haproxy_group}}"
|
||||
gid: "{{pplg_haproxy_gid}}"
|
||||
system: true
|
||||
|
||||
- name: Create haproxy user
|
||||
ansible.builtin.user:
|
||||
name: "{{pplg_haproxy_user}}"
|
||||
comment: "PPLG HAProxy"
|
||||
group: "{{pplg_haproxy_group}}"
|
||||
uid: "{{pplg_haproxy_uid}}"
|
||||
system: true
|
||||
|
||||
- name: Create HAProxy directories
|
||||
ansible.builtin.file:
|
||||
path: "{{item}}"
|
||||
state: directory
|
||||
owner: "{{pplg_haproxy_user}}"
|
||||
group: "{{pplg_haproxy_group}}"
|
||||
mode: '750'
|
||||
loop:
|
||||
- /etc/haproxy
|
||||
- /etc/haproxy/certs
|
||||
|
||||
- name: Fetch wildcard certificate from Titania
|
||||
ansible.builtin.fetch:
|
||||
src: /etc/haproxy/certs/ouranos.pem
|
||||
dest: /tmp/ouranos-haproxy.pem
|
||||
flat: yes
|
||||
delegate_to: titania.incus
|
||||
when: "'titania.incus' in groups['ubuntu']"
|
||||
|
||||
- name: Deploy wildcard certificate
|
||||
ansible.builtin.copy:
|
||||
src: /tmp/ouranos-haproxy.pem
|
||||
dest: "{{pplg_haproxy_cert_path}}"
|
||||
owner: "{{pplg_haproxy_user}}"
|
||||
group: "{{pplg_haproxy_group}}"
|
||||
mode: '0640'
|
||||
when: "'titania.incus' in groups['ubuntu']"
|
||||
|
||||
- name: Generate self-signed wildcard certificate (fallback)
|
||||
command: >
|
||||
openssl req -x509 -nodes -days 365 -newkey rsa:2048
|
||||
-keyout {{pplg_haproxy_cert_path}}
|
||||
-out {{pplg_haproxy_cert_path}}
|
||||
-subj "/C=US/ST=State/L=City/O=Ouranos/CN=*.{{pplg_haproxy_domain}}"
|
||||
-addext "subjectAltName=DNS:*.{{pplg_haproxy_domain}},DNS:{{pplg_haproxy_domain}}"
|
||||
when: "'titania.incus' not in groups['ubuntu']"
|
||||
args:
|
||||
creates: "{{pplg_haproxy_cert_path}}"
|
||||
|
||||
# ===========================================================================
|
||||
# HAProxy (TLS Termination)
|
||||
# ===========================================================================
|
||||
|
||||
- name: Template HAProxy configuration
|
||||
ansible.builtin.template:
|
||||
src: pplg-haproxy.cfg.j2
|
||||
dest: /etc/haproxy/haproxy.cfg
|
||||
owner: "{{pplg_haproxy_user}}"
|
||||
group: "{{pplg_haproxy_group}}"
|
||||
mode: "640"
|
||||
validate: haproxy -c -f %s
|
||||
notify: restart haproxy
|
||||
|
||||
- name: Enable and start HAProxy service
|
||||
ansible.builtin.systemd:
|
||||
name: haproxy
|
||||
enabled: true
|
||||
state: started
|
||||
|
||||
# ===========================================================================
|
||||
# Handlers
|
||||
# ===========================================================================
|
||||
@@ -484,11 +407,6 @@
|
||||
ansible.builtin.systemd:
|
||||
daemon_reload: true
|
||||
|
||||
- name: restart haproxy
|
||||
ansible.builtin.systemd:
|
||||
name: haproxy
|
||||
state: reloaded
|
||||
|
||||
- name: restart oauth2-proxy-prometheus
|
||||
ansible.builtin.systemd:
|
||||
name: oauth2-proxy-prometheus
|
||||
|
||||
@@ -10,7 +10,7 @@ client_id = "{{prometheus_oauth2_client_id}}"
|
||||
client_secret = "{{prometheus_oauth2_client_secret}}"
|
||||
|
||||
# Redirect URL after authentication
|
||||
redirect_url = "https://prometheus.{{pplg_haproxy_domain}}/oauth2/callback"
|
||||
redirect_url = "https://prometheus.{{pplg_domain}}/oauth2/callback"
|
||||
|
||||
# Upstream service (Prometheus)
|
||||
upstreams = [
|
||||
@@ -24,7 +24,7 @@ cookie_secure = true
|
||||
cookie_httponly = true
|
||||
cookie_expire = "168h"
|
||||
cookie_refresh = "1h"
|
||||
cookie_domains = ".{{pplg_haproxy_domain}}"
|
||||
cookie_domains = ".{{pplg_domain}}"
|
||||
session_store_type = "cookie"
|
||||
|
||||
# Authentication settings
|
||||
@@ -49,9 +49,10 @@ http_address = "0.0.0.0:{{prometheus_proxy_port}}"
|
||||
reverse_proxy = true
|
||||
real_client_ip_header = "X-Forwarded-For"
|
||||
|
||||
# Skip authentication for health check endpoints
|
||||
# Skip authentication for health check and machine-to-machine endpoints
|
||||
skip_auth_routes = [
|
||||
"^/ping$"
|
||||
"^/ping$",
|
||||
"^/api/v1/write$"
|
||||
]
|
||||
|
||||
# OIDC specific settings
|
||||
|
||||
@@ -9,7 +9,7 @@ User={{pgadmin_user}}
|
||||
Group={{pgadmin_group}}
|
||||
WorkingDirectory=/usr/pgadmin4/web
|
||||
ExecStart=/usr/pgadmin4/venv/bin/python3 -m gunicorn pgAdmin4:app \
|
||||
--bind 127.0.0.1:{{pgadmin_port}} \
|
||||
--bind 0.0.0.0:{{pgadmin_port}} \
|
||||
--workers 1 \
|
||||
--threads 4 \
|
||||
--timeout 120 \
|
||||
|
||||
@@ -1,127 +0,0 @@
|
||||
# PPLG HAProxy - Internal TLS Termination for Prospero
|
||||
# Services: Grafana, PgAdmin, Prometheus (via OAuth2-Proxy), Loki, Alertmanager
|
||||
# Managed by Ansible - Red Panda Approved
|
||||
|
||||
global
|
||||
log 127.0.0.1:{{pplg_haproxy_syslog_port}} local0
|
||||
stats timeout 30s
|
||||
|
||||
# Default SSL material locations
|
||||
ca-base /etc/ssl/certs
|
||||
crt-base /etc/ssl/private
|
||||
|
||||
# SSL/TLS configuration
|
||||
ssl-default-bind-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384
|
||||
ssl-default-bind-ciphersuites TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256
|
||||
ssl-default-bind-options ssl-min-ver TLSv1.2 no-tls-tickets
|
||||
|
||||
defaults
|
||||
log global
|
||||
mode http
|
||||
option httplog
|
||||
option dontlognull
|
||||
log-format "%ci:%cp [%tr] %ft %b/%s %TR/%Tw/%Tc/%Tr/%Ta %ST %B %CC %CS %tsc %ac/%fc/%bc/%sc/%rc %sq/%bq %hr %hs %{+Q}r"
|
||||
timeout connect 5s
|
||||
timeout client 50s
|
||||
timeout server 50s
|
||||
|
||||
# Stats page with Prometheus metrics
|
||||
listen stats
|
||||
bind *:{{pplg_haproxy_stats_port}}
|
||||
mode http
|
||||
stats enable
|
||||
stats uri /metrics
|
||||
stats refresh 15s
|
||||
stats show-legends
|
||||
stats show-node
|
||||
|
||||
# Prometheus metrics endpoint
|
||||
http-request use-service prometheus-exporter if { path /metrics }
|
||||
|
||||
# HTTP frontend - redirect all traffic to HTTPS
|
||||
frontend http_frontend
|
||||
bind *:80
|
||||
mode http
|
||||
option httplog
|
||||
http-request redirect scheme https code 301
|
||||
|
||||
# HTTPS frontend with subdomain-based routing
|
||||
frontend https_frontend
|
||||
bind *:443 ssl crt {{pplg_haproxy_cert_path}}
|
||||
mode http
|
||||
option httplog
|
||||
option forwardfor
|
||||
|
||||
# Forward original protocol and host
|
||||
http-request set-header X-Forwarded-Proto https
|
||||
http-request set-header X-Forwarded-Port %[dst_port]
|
||||
http-request set-header X-Forwarded-Host %[req.hdr(Host)]
|
||||
|
||||
# Security headers
|
||||
http-response set-header Strict-Transport-Security "max-age=31536000; includeSubDomains"
|
||||
http-response set-header X-Frame-Options "SAMEORIGIN"
|
||||
http-response set-header X-Content-Type-Options "nosniff"
|
||||
http-response set-header X-XSS-Protection "1; mode=block"
|
||||
|
||||
# Subdomain ACLs
|
||||
acl host_grafana hdr_beg(host) -i grafana.{{pplg_haproxy_domain}}
|
||||
acl host_pgadmin hdr_beg(host) -i pgadmin.{{pplg_haproxy_domain}}
|
||||
acl host_prometheus hdr_beg(host) -i prometheus.{{pplg_haproxy_domain}}
|
||||
acl host_loki hdr_beg(host) -i loki.{{pplg_haproxy_domain}}
|
||||
acl host_alertmanager hdr_beg(host) -i alertmanager.{{pplg_haproxy_domain}}
|
||||
|
||||
# Prometheus write API - bypass OAuth2-Proxy (machine-to-machine)
|
||||
acl is_prometheus_write path_beg /api/v1/write
|
||||
|
||||
use_backend backend_grafana if host_grafana
|
||||
use_backend backend_pgadmin if host_pgadmin
|
||||
use_backend backend_prometheus_direct if host_prometheus is_prometheus_write
|
||||
use_backend backend_prometheus if host_prometheus
|
||||
use_backend backend_loki if host_loki
|
||||
use_backend backend_alertmanager if host_alertmanager
|
||||
|
||||
# Grafana - Native Casdoor OAuth SSO
|
||||
backend backend_grafana
|
||||
mode http
|
||||
balance roundrobin
|
||||
option httpchk GET /api/health
|
||||
http-check expect status 200
|
||||
server grafana_1 127.0.0.1:3000 check
|
||||
|
||||
# PgAdmin - Native Casdoor OAuth SSO
|
||||
backend backend_pgadmin
|
||||
mode http
|
||||
balance roundrobin
|
||||
option httpchk GET /misc/ping
|
||||
http-check expect status 200
|
||||
server pgadmin_1 127.0.0.1:{{pgadmin_port}} check
|
||||
|
||||
# Prometheus UI - via OAuth2-Proxy sidecar
|
||||
backend backend_prometheus
|
||||
mode http
|
||||
balance roundrobin
|
||||
option httpchk GET /ping
|
||||
http-check expect status 200
|
||||
server prometheus_1 127.0.0.1:{{prometheus_proxy_port}} check
|
||||
|
||||
# Prometheus Write API - direct (no auth, machine-to-machine)
|
||||
backend backend_prometheus_direct
|
||||
mode http
|
||||
balance roundrobin
|
||||
server prometheus_write_1 127.0.0.1:9090 check
|
||||
|
||||
# Loki - no auth (machine-to-machine log ingestion)
|
||||
backend backend_loki
|
||||
mode http
|
||||
balance roundrobin
|
||||
option httpchk GET /ready
|
||||
http-check expect status 200
|
||||
server loki_1 127.0.0.1:{{loki_port}} check
|
||||
|
||||
# Alertmanager - internal only
|
||||
backend backend_alertmanager
|
||||
mode http
|
||||
balance roundrobin
|
||||
option httpchk GET /-/healthy
|
||||
http-check expect status 200
|
||||
server alertmanager_1 127.0.0.1:{{alertmanager_port}} check
|
||||
150
docs/pplg.md
150
docs/pplg.md
@@ -2,12 +2,11 @@
|
||||
|
||||
## Overview
|
||||
|
||||
PPLG is the consolidated observability and administration stack running on **Prospero**. It bundles PgAdmin, Prometheus, Loki, and Grafana behind an internal HAProxy for TLS termination, with Casdoor SSO for user-facing services and OAuth2-Proxy as a sidecar for Prometheus UI authentication.
|
||||
PPLG is the consolidated observability and administration stack running on **Prospero**. It bundles PgAdmin, Prometheus, Loki, and Grafana with Casdoor SSO for user-facing services and OAuth2-Proxy as a sidecar for Prometheus UI authentication. TLS termination is handled by Titania's HAProxy, which routes directly to each service on Prospero.
|
||||
|
||||
**Host:** prospero.incus
|
||||
**Role:** Observability
|
||||
**Incus Ports:** 25510 → 443 (HTTPS), 25511 → 80 (HTTP redirect)
|
||||
**External Access:** Via Titania HAProxy → `prospero.incus:443`
|
||||
**External Access:** Via Titania HAProxy → `prospero.incus` (direct to service ports)
|
||||
|
||||
| Subdomain | Service | Auth Method |
|
||||
|-----------|---------|-------------|
|
||||
@@ -23,33 +22,23 @@ PPLG is the consolidated observability and administration stack running on **Pro
|
||||
┌──────────┐ ┌────────────┐ ┌─────────────────────────────────────────────────┐
|
||||
│ Client │─────▶│ HAProxy │─────▶│ Prospero (PPLG) │
|
||||
│ │ │ (Titania) │ │ │
|
||||
└──────────┘ │ :443 → :443 │ ┌──────────────────────────────────────────┐ │
|
||||
└────────────┘ │ │ HAProxy (systemd, :443/:80) │ │
|
||||
│ │ TLS termination + subdomain routing │ │
|
||||
┌──────────┐ │ └───┬──────┬──────┬──────┬──────┬──────────┘ │
|
||||
│ Alloy │──push──────────────────────────▶│ │ │ │ │
|
||||
│ (agents) │ loki.ouranos.helu.ca │ │ │ │ │ │
|
||||
│ │ prometheus.ouranos.helu.ca │ │ │ │ │
|
||||
└──────────┘ │ ▼ ▼ ▼ ▼ ▼ │
|
||||
│ Grafana PgAdmin OAuth2 Loki Alertmanager │
|
||||
│ :3000 :5050 Proxy :3100 :9093 │
|
||||
│ :9091 │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ Prometheus │
|
||||
│ :9090 │
|
||||
└─────────────────────────────────────────────────┘
|
||||
└──────────┘ │ :443 TLS │ │ Grafana (:3000) — Casdoor OAuth │
|
||||
│ termination│ │ PgAdmin (:5050) — Casdoor OAuth │
|
||||
┌──────────┐ └────────────┘ │ OAuth2-Proxy (:9091) → Prometheus (:9090) │
|
||||
│ Alloy │─────────────────────────▶│ Loki (:3100) — no auth │
|
||||
│ (agents) │ │ Alertmanager (:9093) — no auth │
|
||||
└──────────┘ └─────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Traffic Flow
|
||||
|
||||
| Source | Destination | Path | Auth |
|
||||
|--------|-------------|------|------|
|
||||
| Browser → Grafana | Titania :443 → Prospero :443 → HAProxy → :3000 | Subdomain ACL | Casdoor OAuth |
|
||||
| Browser → PgAdmin | Titania :443 → Prospero :443 → HAProxy → :5050 | Subdomain ACL | Casdoor OAuth |
|
||||
| Browser → Prometheus | Titania :443 → Prospero :443 → HAProxy → OAuth2-Proxy :9091 → :9090 | Subdomain ACL | OAuth2-Proxy → Casdoor |
|
||||
| Alloy → Loki | `https://loki.ouranos.helu.ca` → HAProxy :443 → :3100 | Subdomain ACL | None |
|
||||
| Alloy → Prometheus | `https://prometheus.ouranos.helu.ca/api/v1/write` → HAProxy :443 → :9090 | `skip_auth_route` | None |
|
||||
| Browser → Grafana | Titania :443 → Prospero :3000 | Subdomain ACL | Casdoor OAuth |
|
||||
| Browser → PgAdmin | Titania :443 → Prospero :5050 | Subdomain ACL | Casdoor OAuth |
|
||||
| Browser → Prometheus | Titania :443 → Prospero :9091 (OAuth2-Proxy) → :9090 | Subdomain ACL | OAuth2-Proxy → Casdoor |
|
||||
| Alloy → Loki | Titania :443 → Prospero :3100 | Subdomain ACL | None |
|
||||
| Alloy → Prometheus | Titania :443 → Prospero :9091 → :9090 | `skip_auth_routes` | None |
|
||||
|
||||
## Deployment
|
||||
|
||||
@@ -72,7 +61,6 @@ ansible-playbook pplg/deploy.yml
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `pplg/deploy.yml` | Main consolidated deployment playbook |
|
||||
| `pplg/pplg-haproxy.cfg.j2` | HAProxy TLS termination config (5 backends) |
|
||||
| `pplg/prometheus.yml.j2` | Prometheus scrape configuration |
|
||||
| `pplg/alert_rules.yml.j2` | Prometheus alerting rules |
|
||||
| `pplg/alertmanager.yml.j2` | Alertmanager routing and Pushover notifications |
|
||||
@@ -88,15 +76,13 @@ ansible-playbook pplg/deploy.yml
|
||||
### Deployment Steps
|
||||
|
||||
1. **APT Repositories**: Add Grafana and PgAdmin repos
|
||||
2. **Install Packages**: haproxy, prometheus, loki, grafana, pgadmin4-web, gunicorn
|
||||
2. **Install Packages**: prometheus, loki, grafana, pgadmin4-web
|
||||
3. **Prometheus**: Config, alert rules, systemd override for remote write receiver
|
||||
4. **Alertmanager**: Install, config with Pushover integration
|
||||
5. **Loki**: Create user/dirs, template config
|
||||
6. **Grafana**: Provisioning (datasources, users, dashboards), OAuth config
|
||||
7. **PgAdmin**: Create user/dirs, gunicorn systemd service, Casdoor OAuth config
|
||||
8. **OAuth2-Proxy**: Download binary (v7.6.0), config for Prometheus sidecar
|
||||
9. **SSL Certificate**: Fetch Let's Encrypt wildcard cert from Titania (self-signed fallback)
|
||||
10. **HAProxy**: Template config, enable and start systemd service
|
||||
|
||||
### Deployment Order
|
||||
|
||||
@@ -298,35 +284,18 @@ Register in Casdoor Admin UI (`https://id.ouranos.helu.ca`) or add to `ansible/c
|
||||
| **Loki** | None | Machine-to-machine (Alloy agents push logs) |
|
||||
| **Alertmanager** | None | Internal only |
|
||||
|
||||
## HAProxy Configuration
|
||||
## OAuth2-Proxy skip_auth_routes
|
||||
|
||||
### Backends
|
||||
The Prometheus write API (`/api/v1/write`) and health check (`/ping`) are accessed by Alloy agents for machine-to-machine metric pushes. OAuth2-Proxy's `skip_auth_routes` config bypasses authentication for these paths:
|
||||
|
||||
| Backend | Upstream | Health Check | Auth |
|
||||
|---------|----------|-------------|------|
|
||||
| `backend_grafana` | `127.0.0.1:3000` | `GET /api/health` | Grafana OAuth |
|
||||
| `backend_pgadmin` | `127.0.0.1:5050` | `GET /misc/ping` | PgAdmin OAuth |
|
||||
| `backend_prometheus` | `127.0.0.1:9091` (OAuth2-Proxy) | `GET /ping` | OAuth2-Proxy |
|
||||
| `backend_prometheus_direct` | `127.0.0.1:9090` | — | None (write API) |
|
||||
| `backend_loki` | `127.0.0.1:3100` | `GET /ready` | None |
|
||||
| `backend_alertmanager` | `127.0.0.1:9093` | `GET /-/healthy` | None |
|
||||
|
||||
### skip_auth_route Pattern
|
||||
|
||||
The Prometheus write API (`/api/v1/write`) is accessed by Alloy agents for machine-to-machine metric pushes. HAProxy uses an ACL to bypass OAuth2-Proxy:
|
||||
|
||||
```
|
||||
acl is_prometheus_write path_beg /api/v1/write
|
||||
use_backend backend_prometheus_direct if host_prometheus is_prometheus_write
|
||||
```toml
|
||||
skip_auth_routes = [
|
||||
"^/ping$",
|
||||
"^/api/v1/write$"
|
||||
]
|
||||
```
|
||||
|
||||
This routes `https://prometheus.ouranos.helu.ca/api/v1/write` directly to Prometheus on `:9090`, while all other Prometheus traffic goes through OAuth2-Proxy on `:9091`.
|
||||
|
||||
### SSL Certificate
|
||||
|
||||
- **Primary**: Let's Encrypt wildcard cert (`*.ouranos.helu.ca`) fetched from Titania
|
||||
- **Fallback**: Self-signed cert generated on Prospero (if Titania unavailable)
|
||||
- **Path**: `/etc/haproxy/certs/ouranos.pem`
|
||||
This allows `https://prometheus.ouranos.helu.ca/api/v1/write` to reach Prometheus without OAuth, while all other Prometheus traffic requires Casdoor SSO authentication.
|
||||
|
||||
## Host Variables
|
||||
|
||||
@@ -340,7 +309,7 @@ services:
|
||||
```
|
||||
|
||||
Key variable groups defined in `prospero.incus.yml`:
|
||||
- PPLG HAProxy (user, group, uid/gid 800, syslog port)
|
||||
- PPLG domain (`ouranos.helu.ca`)
|
||||
- Grafana (datasources, users, OAuth config)
|
||||
- Prometheus (scrape targets, OAuth2-Proxy sidecar config)
|
||||
- Alertmanager (Pushover integration)
|
||||
@@ -348,56 +317,36 @@ Key variable groups defined in `prospero.incus.yml`:
|
||||
- PgAdmin (user, data/log directories, OAuth config)
|
||||
- Casdoor Metrics (access key/secret for Prometheus scraping)
|
||||
|
||||
## Terraform
|
||||
## Titania Backend Routing
|
||||
|
||||
### Prospero Port Mapping
|
||||
|
||||
```hcl
|
||||
devices = [
|
||||
{
|
||||
name = "https_internal"
|
||||
type = "proxy"
|
||||
properties = {
|
||||
listen = "tcp:0.0.0.0:25510"
|
||||
connect = "tcp:127.0.0.1:443"
|
||||
}
|
||||
},
|
||||
{
|
||||
name = "http_redirect"
|
||||
type = "proxy"
|
||||
properties = {
|
||||
listen = "tcp:0.0.0.0:25511"
|
||||
connect = "tcp:127.0.0.1:80"
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
Run `terraform apply` before deploying if port mappings changed.
|
||||
|
||||
### Titania Backend Routing
|
||||
|
||||
Titania's HAProxy routes external subdomains to Prospero's HTTPS port:
|
||||
Titania's HAProxy routes external subdomains directly to Prospero service ports:
|
||||
|
||||
```yaml
|
||||
# In titania.incus.yml haproxy_backends
|
||||
- subdomain: "grafana"
|
||||
backend_host: "prospero.incus"
|
||||
backend_port: 443
|
||||
backend_port: 3000
|
||||
health_path: "/api/health"
|
||||
ssl_backend: true
|
||||
|
||||
- subdomain: "pgadmin"
|
||||
backend_host: "prospero.incus"
|
||||
backend_port: 443
|
||||
backend_port: 5050
|
||||
health_path: "/misc/ping"
|
||||
ssl_backend: true
|
||||
|
||||
- subdomain: "prometheus"
|
||||
backend_host: "prospero.incus"
|
||||
backend_port: 443
|
||||
backend_port: 9091 # OAuth2-Proxy sidecar
|
||||
health_path: "/ping"
|
||||
ssl_backend: true
|
||||
|
||||
- subdomain: "loki"
|
||||
backend_host: "prospero.incus"
|
||||
backend_port: 3100
|
||||
health_path: "/ready"
|
||||
|
||||
- subdomain: "alertmanager"
|
||||
backend_host: "prospero.incus"
|
||||
backend_port: 9093
|
||||
health_path: "/-/healthy"
|
||||
```
|
||||
|
||||
## Monitoring
|
||||
@@ -406,7 +355,6 @@ Titania's HAProxy routes external subdomains to Prospero's HTTPS port:
|
||||
|
||||
**File:** `ansible/alloy/prospero/config.alloy.j2`
|
||||
|
||||
- **HAProxy Syslog**: `loki.source.syslog` on `127.0.0.1:51405` (TCP) receives Docker syslog from HAProxy container
|
||||
- **Journal Labels**: Dedicated job labels for `grafana-server`, `prometheus`, `loki`, `alertmanager`, `pgadmin`, `oauth2-proxy-prometheus`
|
||||
- **System Logs**: `/var/log/syslog`, `/var/log/auth.log` → Loki
|
||||
- **Metrics**: Node exporter + process exporter → Prometheus remote write
|
||||
@@ -477,22 +425,11 @@ ssh prospero.incus
|
||||
sudo systemctl status prometheus grafana-server loki prometheus-alertmanager pgadmin oauth2-proxy-prometheus
|
||||
```
|
||||
|
||||
### HAProxy Service
|
||||
|
||||
```bash
|
||||
ssh prospero.incus
|
||||
sudo systemctl status haproxy
|
||||
sudo journalctl -u haproxy -f
|
||||
```
|
||||
|
||||
### View Logs
|
||||
|
||||
```bash
|
||||
# All PPLG services via journal
|
||||
sudo journalctl -u prometheus -u grafana-server -u loki -u prometheus-alertmanager -u pgadmin -u oauth2-proxy-prometheus -f
|
||||
|
||||
# HAProxy logs (shipped via syslog to Alloy → Loki)
|
||||
# Query in Grafana: {job="pplg-haproxy"}
|
||||
```
|
||||
|
||||
### Test Endpoints (from Prospero)
|
||||
@@ -512,18 +449,17 @@ curl -s http://127.0.0.1:3100/ready
|
||||
|
||||
# Alertmanager
|
||||
curl -s http://127.0.0.1:9093/-/healthy
|
||||
|
||||
# HAProxy stats
|
||||
curl -s http://127.0.0.1:8404/metrics | head
|
||||
```
|
||||
|
||||
### Test TLS (from any host)
|
||||
### Test External Access (from any host)
|
||||
|
||||
```bash
|
||||
# Direct to Prospero container
|
||||
curl -sk https://prospero.incus/api/health
|
||||
# Via Titania HAProxy
|
||||
curl -s https://grafana.ouranos.helu.ca/api/health
|
||||
curl -s https://pgadmin.ouranos.helu.ca/misc/ping
|
||||
curl -s https://prometheus.ouranos.helu.ca/ping
|
||||
curl -s https://loki.ouranos.helu.ca/ready
|
||||
curl -s https://alertmanager.ouranos.helu.ca/-/healthy
|
||||
```
|
||||
|
||||
### Common Errors
|
||||
|
||||
Reference in New Issue
Block a user