diff --git a/ansible/certbot/cert-distribute.yml b/ansible/certbot/cert-distribute.yml new file mode 100644 index 0000000..a9e878f --- /dev/null +++ b/ansible/certbot/cert-distribute.yml @@ -0,0 +1,87 @@ +--- +# ----------------------------------------------------------------------------- +# Certificate Distribution Playbook +# ----------------------------------------------------------------------------- +# Pulls certificates from OCI Vault (uploaded by bootes certbot) and +# deploys them directly to target hosts for HAProxy/service TLS termination. +# +# Each target host defines its certificates in host_vars: +# certbot_distributed_certs: +# - cert_name: corvus.helu.ca +# cert_path: /etc/haproxy/certs/corvus.helu.ca.pem +# +# Run from fornax: +# ansible-playbook certbot/cert-distribute.yml +# +# Deployed as a weekly cron job on fornax. +# Can also be run manually after ad-hoc certificate renewals. +# ----------------------------------------------------------------------------- + +- name: Distribute certificates from OCI Vault to target hosts + hosts: ubuntu:debian + gather_facts: false + + handlers: + - name: reload haproxy + become: true + ansible.builtin.systemd: + name: haproxy + state: reloaded + when: "'haproxy' in services | default([])" + + tasks: + - name: Skip hosts without distributed certificates + ansible.builtin.meta: end_host + when: certbot_distributed_certs is not defined + + - name: Ensure cert directory exists + become: true + ansible.builtin.file: + path: "{{ certbot_distributed_certs[0].cert_path | dirname }}" + state: directory + owner: root + group: root + mode: '0755' + + - name: Deploy certificate from OCI Vault + become: true + ansible.builtin.copy: + content: | + {{ lookup('oci_secret', item.cert_name | replace('.', '-') + '-fullchain', vault_id=oci_vault_id) }} + {{ lookup('oci_secret', item.cert_name | replace('.', '-') + '-privkey', vault_id=oci_vault_id) }} + dest: "{{ item.cert_path }}" + owner: root + group: root + mode: '0640' + loop: "{{ certbot_distributed_certs }}" + loop_control: + label: "{{ item.cert_name }}" + no_log: true + notify: reload haproxy + + - name: Verify deployed certificates are valid PEM + become: true + ansible.builtin.command: + cmd: openssl x509 -noout -checkend 0 -in {{ item.cert_path }} + register: _cert_check + loop: "{{ certbot_distributed_certs }}" + loop_control: + label: "{{ item.cert_name }}" + changed_when: false + + - name: Show certificate expiry dates + become: true + ansible.builtin.command: + cmd: openssl x509 -noout -subject -enddate -in {{ item.cert_path }} + register: _cert_info + loop: "{{ certbot_distributed_certs }}" + loop_control: + label: "{{ item.cert_name }}" + changed_when: false + + - name: Log certificate status + ansible.builtin.debug: + msg: "{{ item.item.cert_name }}: {{ item.stdout }}" + loop: "{{ _cert_info.results }}" + loop_control: + label: "{{ item.item.cert_name }}" diff --git a/ansible/certbot/cert-metrics.sh.j2 b/ansible/certbot/cert-metrics.sh.j2 index 8667f7e..410ce64 100644 --- a/ansible/certbot/cert-metrics.sh.j2 +++ b/ansible/certbot/cert-metrics.sh.j2 @@ -68,4 +68,4 @@ fi # Set permissions and atomic move chmod 644 "${TEMP_FILE}" chown prometheus:prometheus "${TEMP_FILE}" 2>/dev/null || true -mv "${TEMP_FILE}" "${METRICS_FILE}" \ No newline at end of file +mv "${TEMP_FILE}" "${METRICS_FILE}" diff --git a/ansible/certbot/deploy.yml b/ansible/certbot/deploy.yml index 38680cd..dec536d 100644 --- a/ansible/certbot/deploy.yml +++ b/ansible/certbot/deploy.yml @@ -2,14 +2,23 @@ # ----------------------------------------------------------------------------- # Certbot Deployment Playbook # ----------------------------------------------------------------------------- -# Deploys certbot with Namecheap DNS-01 validation for wildcard certificates -# Host: hippocamp.helu.ca (OCI HAProxy instance) +# Deploys certbot with Namecheap DNS-01 validation and requests certificates. +# Reusable across all certbot hosts (horkos, bootes). +# +# Supports two host_vars patterns: +# Single-cert: certbot_cert_name + certbot_domains (horkos) +# Multi-cert: certbot_certificates list (bootes) # # Secrets are fetched automatically from OCI Vault via group_vars/all/secrets.yml +# +# Usage: +# ansible-playbook certbot/deploy.yml --limit horkos.helu.ca +# ansible-playbook certbot/deploy.yml --limit bootes.helu.ca # ----------------------------------------------------------------------------- - name: Deploy Certbot with Namecheap DNS-01 Validation hosts: ubuntu + gather_facts: false vars: ansible_common_remote_group: "{{ certbot_group | default(omit) }}" allow_world_readable_tmpfiles: true @@ -32,6 +41,16 @@ ansible.builtin.meta: end_host when: not has_certbot_service + # ------------------------------------------------------------------------- + # Build Unified Certificate List + # ------------------------------------------------------------------------- + + - name: Build unified certificate list from host_vars + ansible.builtin.set_fact: + _certbot_certs: >- + {{ certbot_certificates + | default([{'cert_name': certbot_cert_name, 'domains': certbot_domains}]) }} + # ------------------------------------------------------------------------- # System Setup # ------------------------------------------------------------------------- @@ -53,10 +72,17 @@ home: "{{ certbot_directory }}" create_home: false - - name: Add keeper_user to certbot group + - name: Add certbot user to ponos group become: true ansible.builtin.user: - name: "{{ keeper_user }}" + name: "{{ certbot_user }}" + groups: ponos + append: true + + - name: Add ponos user to certbot group + become: true + ansible.builtin.user: + name: ponos groups: "{{ certbot_group }}" append: true @@ -80,32 +106,6 @@ - "{{ certbot_directory }}/credentials" - "{{ certbot_directory }}/hooks" - - name: Create haproxy group for certificate directory - become: true - ansible.builtin.group: - name: "{{ haproxy_group | default('haproxy') }}" - system: true - - - name: Create haproxy user for certificate directory - become: true - ansible.builtin.user: - name: "{{ haproxy_user | default('haproxy') }}" - comment: "HAProxy Load Balancer" - group: "{{ haproxy_group | default('haproxy') }}" - system: true - shell: /usr/sbin/nologin - home: /nonexistent - create_home: false - - - name: Create certificate output directory - become: true - ansible.builtin.file: - path: /etc/haproxy/certs - owner: "{{ certbot_user }}" - group: "{{ haproxy_group | default('haproxy') }}" - state: directory - mode: '0750' - # ------------------------------------------------------------------------- # Python Virtual Environment # ------------------------------------------------------------------------- @@ -116,6 +116,7 @@ name: - python3-venv - python3-pip + - acl state: present update_cache: true @@ -125,50 +126,46 @@ ansible.builtin.command: python3 -m venv {{ certbot_directory }}/.venv args: creates: "{{ certbot_directory }}/.venv/bin/activate" - vars: - ansible_common_remote_group: "{{ certbot_group }}" - allow_world_readable_tmpfiles: true - - - name: Upgrade pip in virtualenv - become: true - become_user: "{{ certbot_user }}" - ansible.builtin.pip: - name: pip - state: latest - virtualenv: "{{ certbot_directory }}/.venv" - vars: - ansible_common_remote_group: "{{ certbot_group }}" - allow_world_readable_tmpfiles: true - name: Install certbot and Namecheap DNS plugin become: true become_user: "{{ certbot_user }}" ansible.builtin.pip: name: + - pip - certbot - certbot-dns-namecheap state: present virtualenv: "{{ certbot_directory }}/.venv" - vars: - ansible_common_remote_group: "{{ certbot_group }}" - allow_world_readable_tmpfiles: true + + - name: Install OCI CLI in certbot venv (vault upload hosts) + become: true + become_user: "{{ certbot_user }}" + ansible.builtin.pip: + name: + - oci-cli + state: present + virtualenv: "{{ certbot_directory }}/.venv" + when: certbot_vault_upload | default(false) # ------------------------------------------------------------------------- # Namecheap Credentials # ------------------------------------------------------------------------- - - name: Get public IP for Namecheap API + - name: Get public IP for Namecheap API whitelisting ansible.builtin.uri: url: https://ifconfig.me/ip return_content: true register: public_ip_result - delegate_to: localhost - become: false - name: Set client IP fact ansible.builtin.set_fact: namecheap_client_ip: "{{ public_ip_result.content | trim }}" + - name: Display public IP for Namecheap API whitelisting + ansible.builtin.debug: + msg: "Public IP: {{ namecheap_client_ip }} — ensure whitelisted at https://ap.www.namecheap.com/settings/tools/apiaccess/" + - name: Template Namecheap credentials become: true ansible.builtin.template: @@ -182,7 +179,7 @@ # Renewal Hooks # ------------------------------------------------------------------------- - - name: Template renewal hook script + - name: Template renewal hook script (HAProxy reload) become: true ansible.builtin.template: src: renewal-hook.sh.j2 @@ -190,6 +187,26 @@ owner: "{{ certbot_user }}" group: "{{ certbot_group }}" mode: '0750' + when: not (certbot_vault_upload | default(false)) + + - name: Template vault upload hook script + become: true + ansible.builtin.template: + src: vault-upload-hook.sh.j2 + dest: "{{ certbot_directory }}/hooks/renewal-hook.sh" + owner: "{{ certbot_user }}" + group: "{{ certbot_group }}" + mode: '0750' + when: certbot_vault_upload | default(false) + + - name: Create Prometheus textfile directory + become: true + ansible.builtin.file: + path: "{{ prometheus_node_exporter_text_directory }}" + state: directory + owner: root + group: root + mode: '0755' - name: Template certificate metrics script become: true @@ -201,20 +218,49 @@ mode: '0750' # ------------------------------------------------------------------------- - # Initial Certificate Request + # Certificate Requests # ------------------------------------------------------------------------- - - name: Check if certificate already exists + - name: Check if certificates already exist become: true ansible.builtin.stat: - path: "{{ certbot_directory }}/config/live/{{ certbot_cert_name }}/fullchain.pem" - register: cert_exists + path: "{{ certbot_directory }}/config/live/{{ item.cert_name }}/fullchain.pem" + register: cert_check + loop: "{{ _certbot_certs }}" + loop_control: + label: "{{ item.cert_name }}" - - name: Build domain arguments for certbot + - name: Get current certificate domains + become: true + ansible.builtin.shell: | + set -euo pipefail + openssl x509 -in {{ certbot_directory }}/config/live/{{ item.item.cert_name }}/fullchain.pem \ + -noout -ext subjectAltName | \ + grep -oP 'DNS:\K[^,\s]+' | sort + args: + executable: /bin/bash + register: cert_domains + loop: "{{ cert_check.results }}" + when: item.stat.exists + changed_when: false + loop_control: + label: "{{ item.item.cert_name }}" + + - name: Determine which certificates need requesting ansible.builtin.set_fact: - certbot_domain_args: "{{ certbot_domains | map('regex_replace', '^', '-d ') | join(' ') }}" + _certs_to_request: >- + {{ _certs_to_request | default([]) + [ + item.0.item | combine({ + 'needs_request': not item.0.stat.exists, + 'domains_changed': item.0.stat.exists and + (item.1.stdout_lines | default([]) | sort) != (item.0.item.domains | sort) + }) + ] }} + loop: "{{ cert_check.results | zip_longest(cert_domains.results | default([]), fillvalue={}) | list }}" + loop_control: + label: "{{ item.0.item.cert_name }}" - - name: Request initial certificate + - name: Request certificates become: true become_user: "{{ certbot_user }}" ansible.builtin.shell: | @@ -229,17 +275,37 @@ --config-dir {{ certbot_directory }}/config \ --work-dir {{ certbot_directory }}/work \ --logs-dir {{ certbot_directory }}/logs \ - --cert-name {{ certbot_cert_name }} \ - {{ certbot_domain_args }} + --cert-name {{ item.cert_name }} \ + {{ '--force-renewal' if item.domains_changed | default(false) else '' }} \ + {{ item.domains | map('regex_replace', '^', '-d ') | join(' ') }} args: executable: /bin/bash - when: not cert_exists.stat.exists - register: certbot_request + loop: "{{ _certs_to_request | selectattr('needs_request') | list + + _certs_to_request | selectattr('domains_changed') | list }}" + loop_control: + label: "{{ item.cert_name }}" + register: certbot_requests - - name: Run renewal hook after initial certificate + - name: Run renewal hook after certificate requests become: true ansible.builtin.command: "{{ certbot_directory }}/hooks/renewal-hook.sh" - when: certbot_request.changed + environment: >- + {{ {'RENEWED_LINEAGE': certbot_directory + '/config/live/' + item.item.cert_name} + if certbot_vault_upload | default(false) else {} }} + loop: "{{ certbot_requests.results | default([]) }}" + when: item.changed | default(false) + loop_control: + label: "{{ item.item.cert_name }}" + + - name: Ensure vault is populated with current certificates + become: true + ansible.builtin.command: "{{ certbot_directory }}/hooks/renewal-hook.sh" + environment: + RENEWED_LINEAGE: "{{ certbot_directory }}/config/live/{{ item.cert_name }}" + loop: "{{ _certbot_certs }}" + when: certbot_vault_upload | default(false) + loop_control: + label: "{{ item.cert_name }}" # ------------------------------------------------------------------------- # Systemd Timer for Auto-Renewal @@ -269,7 +335,7 @@ ansible.builtin.copy: content: | [Unit] - Description=Run certbot renewal twice daily + Description=Check certbot certificates and renew if expiring [Timer] OnCalendar=*-*-* 00,12:00:00 @@ -294,15 +360,6 @@ # Initial Metrics Update # ------------------------------------------------------------------------- - - name: Ensure prometheus textfile directory exists - become: true - ansible.builtin.file: - path: "{{ prometheus_node_exporter_text_directory }}" - state: directory - owner: prometheus - group: prometheus - mode: '0755' - - name: Run certificate metrics script become: true ansible.builtin.command: "{{ certbot_directory }}/hooks/cert-metrics.sh" @@ -312,12 +369,30 @@ # Verification # ------------------------------------------------------------------------- - - name: Verify certificate exists + - name: Verify certificates exist become: true ansible.builtin.stat: - path: "{{ haproxy_cert_path }}" - register: final_cert + path: "{{ certbot_directory }}/config/live/{{ item.cert_name }}/fullchain.pem" + register: final_certs + loop: "{{ _certbot_certs }}" + loop_control: + label: "{{ item.cert_name }}" - name: Certificate deployment status ansible.builtin.debug: - msg: "Certificate deployed: {{ final_cert.stat.exists }}" \ No newline at end of file + msg: "{{ item.item.cert_name }}: {{ 'deployed' if item.stat.exists else 'MISSING' }}" + loop: "{{ final_certs.results }}" + loop_control: + label: "{{ item.item.cert_name }}" + + - name: Verify HAProxy combined PEM exists + become: true + ansible.builtin.stat: + path: "{{ haproxy_cert_path }}" + register: _haproxy_pem + when: haproxy_cert_path is defined + + - name: HAProxy PEM status + ansible.builtin.debug: + msg: "HAProxy PEM {{ haproxy_cert_path }}: {{ 'present' if _haproxy_pem.stat.exists else 'MISSING — renewal hook may have failed' }}" + when: haproxy_cert_path is defined and _haproxy_pem is defined diff --git a/ansible/certbot/namecheap.ini.j2 b/ansible/certbot/namecheap.ini.j2 index 6879c20..9b4270c 100644 --- a/ansible/certbot/namecheap.ini.j2 +++ b/ansible/certbot/namecheap.ini.j2 @@ -5,4 +5,4 @@ dns_namecheap_username = {{ namecheap_username }} dns_namecheap_api_key = {{ namecheap_api_key }} {% if namecheap_client_ip is defined %} dns_namecheap_client_ip = {{ namecheap_client_ip }} -{% endif %} \ No newline at end of file +{% endif %} diff --git a/ansible/certbot/renewal-hook.sh.j2 b/ansible/certbot/renewal-hook.sh.j2 index 641f76c..0c0bb23 100644 --- a/ansible/certbot/renewal-hook.sh.j2 +++ b/ansible/certbot/renewal-hook.sh.j2 @@ -5,7 +5,7 @@ # This script: # 1. Combines fullchain.pem + privkey.pem into HAProxy format # 2. Sets correct permissions -# 3. Reloads HAProxy via Docker +# 3. Reloads HAProxy via systemd # 4. Updates certificate metrics for Prometheus set -euo pipefail @@ -37,10 +37,9 @@ chmod 640 "${HAPROXY_CERT}" echo "[$(date '+%Y-%m-%d %H:%M:%S')] Certificate combined and written to ${HAPROXY_CERT}" # Reload HAProxy if running -if docker ps --format '{{ '{{' }}.Names{{ '}}' }}' | grep -q haproxy; then +if systemctl is-active --quiet haproxy; then echo "[$(date '+%Y-%m-%d %H:%M:%S')] Reloading HAProxy..." - cd "${HAPROXY_DIR}" - docker compose kill -s HUP haproxy || docker-compose kill -s HUP haproxy + systemctl reload haproxy echo "[$(date '+%Y-%m-%d %H:%M:%S')] HAProxy reloaded" else echo "[$(date '+%Y-%m-%d %H:%M:%S')] HAProxy not running, skipping reload" @@ -49,4 +48,4 @@ fi # Update certificate metrics {{ certbot_directory }}/hooks/cert-metrics.sh -echo "[$(date '+%Y-%m-%d %H:%M:%S')] Renewal hook completed successfully" \ No newline at end of file +echo "[$(date '+%Y-%m-%d %H:%M:%S')] Renewal hook completed successfully" diff --git a/ansible/certbot/vault-certs.yml b/ansible/certbot/vault-certs.yml new file mode 100644 index 0000000..f2d586e --- /dev/null +++ b/ansible/certbot/vault-certs.yml @@ -0,0 +1,347 @@ +--- +# ----------------------------------------------------------------------------- +# Vault Certificate Management Playbook +# ----------------------------------------------------------------------------- +# Checks certificate validity in OCI Vault and renews expired/expiring +# certificates via certbot on bootes. Designed for internal hosts that +# don't have public IPs — certs are stored in OCI Vault for distribution. +# +# Run from fornax: +# ansible-playbook certbot/vault-certs.yml +# +# Steps: +# 1. Validate vault secrets exist and are readable +# 2. Display public IP for Namecheap API whitelisting verification +# 3. Check certificate validity (PEM format + 30-day expiry window) +# 4. Request certificates for any that need renewal (conditional) +# 5. Post-validate renewed certificates in vault (conditional) +# 6. Verify renewal schedule is active +# +# Prerequisites: +# - certbot/deploy.yml has been run on bootes (certbot is installed) +# - certbot/vault-validate.yml has been run (vault R/W confirmed) +# - Namecheap API IP is whitelisted +# ----------------------------------------------------------------------------- + +- name: Manage Internal Host Certificates via OCI Vault + hosts: bootes.helu.ca + gather_facts: false + tags: [certbot, vault, certs] + + tasks: + # ------------------------------------------------------------------------- + # Derive Certificate List from Host Vars + # ------------------------------------------------------------------------- + + - name: Build certificate prefix list from host_vars + ansible.builtin.set_fact: + _cert_prefixes: "{{ certbot_certificates | map(attribute='cert_name') | map('replace', '.', '-') | list }}" + _secret_suffixes: [fullchain, privkey] + + - name: Build list of all vault secret names + ansible.builtin.set_fact: + _all_secret_names: "{{ _cert_prefixes | product(_secret_suffixes) | map('join', '-') | list }}" + + # ------------------------------------------------------------------------- + # Step 1: Validate Vault Secrets Exist + # ------------------------------------------------------------------------- + + - name: "Step 1 — Read vault secrets" + ansible.builtin.set_fact: + "_vault_{{ item | replace('-', '_') }}": "{{ lookup('oci_secret', item, vault_id=oci_vault_id) }}" + loop: "{{ _all_secret_names }}" + loop_control: + label: "{{ item }}" + + - name: "Step 1 — Verify all secrets are readable and non-empty" + ansible.builtin.assert: + that: + - lookup('vars', '_vault_' + item | replace('-', '_')) is defined + - lookup('vars', '_vault_' + item | replace('-', '_')) | length > 0 + fail_msg: "Secret '{{ item }}' is missing or empty in OCI Vault" + success_msg: "{{ item }}" + loop: "{{ _all_secret_names }}" + loop_control: + label: "{{ item }}" + + - name: "Step 1 — Summary" + ansible.builtin.debug: + msg: "All {{ _all_secret_names | length }} vault secrets exist and are readable." + + # ------------------------------------------------------------------------- + # Step 2: IP Whitelisting Check + # ------------------------------------------------------------------------- + + - name: "Step 2 — Get public IP for Namecheap API" + ansible.builtin.uri: + url: https://ifconfig.me/ip + return_content: true + register: _public_ip_result + + - name: "Step 2 — Display public IP for verification" + ansible.builtin.debug: + msg: >- + Public IP: {{ _public_ip_result.content | trim }} + — Ensure whitelisted at https://ap.www.namecheap.com/settings/tools/apiaccess/ + + # ------------------------------------------------------------------------- + # Step 3: Certificate Validity Check + # ------------------------------------------------------------------------- + + - name: Create temporary directory for certificate validation + ansible.builtin.tempfile: + state: directory + prefix: vault-certs- + register: _validate_tmpdir + + - name: "Step 3 — Write fullchain PEMs to temp files" + ansible.builtin.copy: + content: "{{ lookup('vars', ('_vault_' + item | replace('.', '-') + '-fullchain') | replace('-', '_')) }}" + dest: "{{ _validate_tmpdir.path }}/{{ item }}.pem" + mode: '0600' + loop: "{{ certbot_certificates | map(attribute='cert_name') | list }}" + loop_control: + label: "{{ item }}" + + - name: "Step 3 — Check PEM format" + ansible.builtin.command: + cmd: openssl x509 -noout -in {{ _validate_tmpdir.path }}/{{ item }}.pem + loop: "{{ certbot_certificates | map(attribute='cert_name') | list }}" + loop_control: + label: "{{ item }}" + register: _pem_check + changed_when: false + failed_when: false + + - name: "Step 3 — Check certificate expiry (30-day window)" + ansible.builtin.command: + cmd: openssl x509 -checkend 2592000 -noout -in {{ _validate_tmpdir.path }}/{{ item }}.pem + loop: "{{ certbot_certificates | map(attribute='cert_name') | list }}" + loop_control: + label: "{{ item }}" + register: _expiry_check + changed_when: false + failed_when: false + + - name: "Step 3 — Get certificate details" + ansible.builtin.command: + cmd: openssl x509 -noout -subject -enddate -in {{ _validate_tmpdir.path }}/{{ item }}.pem + loop: "{{ certbot_certificates | map(attribute='cert_name') | list }}" + loop_control: + label: "{{ item }}" + register: _cert_details + changed_when: false + failed_when: false + + - name: "Step 3 — Build renewal status" + ansible.builtin.set_fact: + _certs_needing_renewal: >- + {{ _certs_needing_renewal | default([]) + + ([certbot_certificates[idx]] + if _pem_check.results[idx].rc != 0 or _expiry_check.results[idx].rc != 0 + else []) }} + loop: "{{ certbot_certificates | map(attribute='cert_name') | list }}" + loop_control: + index_var: idx + label: "{{ item }}" + + - name: "Step 3 — Display certificate status" + ansible.builtin.debug: + msg: >- + {{ item.item }}: + {{ 'INVALID PEM' if item.rc != 0 else + (_cert_details.results[idx].stdout | default('unknown')) + + (' — NEEDS RENEWAL' if _expiry_check.results[idx].rc != 0 else ' — valid') }} + loop: "{{ _pem_check.results }}" + loop_control: + index_var: idx + label: "{{ item.item }}" + + - name: "Step 3 — Summary" + ansible.builtin.debug: + msg: >- + {{ (_certs_needing_renewal | default([]) | length == 0) + | ternary( + 'All ' + (certbot_certificates | length | string) + ' certificates are valid. Skipping renewal.', + (_certs_needing_renewal | default([]) | length | string) + ' certificate(s) need renewal: ' + + (_certs_needing_renewal | default([]) | map(attribute='cert_name') | join(', ')) + ) }} + + - name: Clean up validation temp directory + ansible.builtin.file: + path: "{{ _validate_tmpdir.path }}" + state: absent + + # ------------------------------------------------------------------------- + # Step 4: Request Certificates (conditional — only if needed) + # ------------------------------------------------------------------------- + + - name: "Step 4 — Request certificates that need renewal" + become: true + become_user: "{{ certbot_user }}" + ansible.builtin.shell: | + source {{ certbot_directory }}/.venv/bin/activate + certbot certonly \ + --non-interactive \ + --agree-tos \ + --email {{ certbot_email }} \ + --authenticator dns-namecheap \ + --dns-namecheap-credentials {{ certbot_directory }}/credentials/namecheap.ini \ + --dns-namecheap-propagation-seconds 120 \ + --config-dir {{ certbot_directory }}/config \ + --work-dir {{ certbot_directory }}/work \ + --logs-dir {{ certbot_directory }}/logs \ + --cert-name {{ item.cert_name }} \ + --force-renewal \ + {{ item.domains | map('regex_replace', '^', '-d ') | join(' ') }} + args: + executable: /bin/bash + loop: "{{ _certs_needing_renewal | default([]) }}" + loop_control: + label: "{{ item.cert_name }}" + register: _certbot_renewals + when: _certs_needing_renewal | default([]) | length > 0 + + - name: "Step 4 — Upload renewed certificates to vault" + become: true + ansible.builtin.command: "{{ certbot_directory }}/hooks/renewal-hook.sh" + environment: + RENEWED_LINEAGE: "{{ certbot_directory }}/config/live/{{ item.item.cert_name }}" + loop: "{{ _certbot_renewals.results | default([]) }}" + when: item.changed | default(false) + loop_control: + label: "{{ item.item.cert_name }}" + + - name: "Step 4 — Update certificate metrics" + become: true + ansible.builtin.command: "{{ certbot_directory }}/hooks/cert-metrics.sh" + changed_when: false + when: _certs_needing_renewal | default([]) | length > 0 + + # ------------------------------------------------------------------------- + # Step 5: Post-Validation (conditional — only after renewal) + # ------------------------------------------------------------------------- + + - name: "Step 5 — Re-read vault secrets after renewal" + ansible.builtin.set_fact: + "_post_{{ item | replace('-', '_') }}": "{{ lookup('oci_secret', item, vault_id=oci_vault_id) }}" + loop: "{{ _all_secret_names }}" + loop_control: + label: "{{ item }}" + when: _certs_needing_renewal | default([]) | length > 0 + + - name: Create post-validation temp directory + ansible.builtin.tempfile: + state: directory + prefix: vault-post- + register: _post_tmpdir + when: _certs_needing_renewal | default([]) | length > 0 + + - name: "Step 5 — Write renewed fullchain PEMs" + ansible.builtin.copy: + content: "{{ lookup('vars', ('_post_' + item | replace('.', '-') + '-fullchain') | replace('-', '_')) }}" + dest: "{{ _post_tmpdir.path }}/{{ item }}-fullchain.pem" + mode: '0600' + loop: "{{ _certs_needing_renewal | default([]) | map(attribute='cert_name') | list }}" + loop_control: + label: "{{ item }}" + when: _certs_needing_renewal | default([]) | length > 0 + + - name: "Step 5 — Write renewed privkey PEMs" + ansible.builtin.copy: + content: "{{ lookup('vars', ('_post_' + item | replace('.', '-') + '-privkey') | replace('-', '_')) }}" + dest: "{{ _post_tmpdir.path }}/{{ item }}-privkey.pem" + mode: '0600' + loop: "{{ _certs_needing_renewal | default([]) | map(attribute='cert_name') | list }}" + loop_control: + label: "{{ item }}" + no_log: true + when: _certs_needing_renewal | default([]) | length > 0 + + - name: "Step 5 — Verify renewed certificates are valid and not expiring" + ansible.builtin.command: + cmd: openssl x509 -checkend 2592000 -noout -in {{ _post_tmpdir.path }}/{{ item }}-fullchain.pem + loop: "{{ _certs_needing_renewal | default([]) | map(attribute='cert_name') | list }}" + loop_control: + label: "{{ item }}" + changed_when: false + when: _certs_needing_renewal | default([]) | length > 0 + + - name: "Step 5 — Verify cert/key modulus match" + ansible.builtin.shell: | + set -euo pipefail + cert_mod=$(openssl x509 -noout -modulus -in "{{ _post_tmpdir.path }}/{{ item }}-fullchain.pem" | openssl md5) + key_mod=$(openssl rsa -noout -modulus -in "{{ _post_tmpdir.path }}/{{ item }}-privkey.pem" 2>/dev/null | openssl md5 || \ + openssl ec -noout -text -in "{{ _post_tmpdir.path }}/{{ item }}-privkey.pem" 2>/dev/null | openssl md5) + if [[ "${cert_mod}" != "${key_mod}" ]]; then + echo "MISMATCH: cert=${cert_mod} key=${key_mod}" >&2 + exit 1 + fi + echo "OK: modulus match" + args: + executable: /bin/bash + loop: "{{ _certs_needing_renewal | default([]) | map(attribute='cert_name') | list }}" + loop_control: + label: "{{ item }}" + changed_when: false + when: _certs_needing_renewal | default([]) | length > 0 + + - name: "Step 5 — Get renewed certificate expiry dates" + ansible.builtin.command: + cmd: openssl x509 -noout -subject -enddate -in {{ _post_tmpdir.path }}/{{ item }}-fullchain.pem + loop: "{{ _certs_needing_renewal | default([]) | map(attribute='cert_name') | list }}" + loop_control: + label: "{{ item }}" + register: _post_cert_details + changed_when: false + when: _certs_needing_renewal | default([]) | length > 0 + + - name: "Step 5 — Display renewed certificate status" + ansible.builtin.debug: + msg: "{{ item.item }}: {{ item.stdout }}" + loop: "{{ _post_cert_details.results | default([]) }}" + loop_control: + label: "{{ item.item }}" + when: _certs_needing_renewal | default([]) | length > 0 + + - name: Clean up post-validation temp directory + ansible.builtin.file: + path: "{{ _post_tmpdir.path }}" + state: absent + when: _post_tmpdir.path is defined + + # ------------------------------------------------------------------------- + # Step 6: Schedule Verification (always runs) + # ------------------------------------------------------------------------- + + - name: "Step 6 — Verify certbot-renew.timer is enabled" + become: true + ansible.builtin.systemd: + name: certbot-renew.timer + enabled: true + state: started + register: _timer_status + + - name: "Step 6 — Get timer status" + become: true + ansible.builtin.command: + cmd: systemctl show certbot-renew.timer --property=ActiveState,NextElapseUSecRealtime,LastTriggerUSec + register: _timer_details + changed_when: false + + - name: "Step 6 — Display timer status" + ansible.builtin.debug: + msg: "certbot-renew.timer: {{ _timer_details.stdout_lines | join(', ') }}" + + - name: "Step 6 — Update certificate metrics" + become: true + ansible.builtin.command: "{{ certbot_directory }}/hooks/cert-metrics.sh" + changed_when: false + + - name: Final summary + ansible.builtin.debug: + msg: >- + Vault certificate check complete. + {{ (certbot_certificates | length) }} certificates checked. + {{ (_certs_needing_renewal | default([]) | length) }} renewed. + Renewal timer is {{ _timer_status.status.ActiveState | default('active') }}. diff --git a/ansible/certbot/vault-upload-hook.sh.j2 b/ansible/certbot/vault-upload-hook.sh.j2 new file mode 100644 index 0000000..0a3f9ce --- /dev/null +++ b/ansible/certbot/vault-upload-hook.sh.j2 @@ -0,0 +1,115 @@ +#!/bin/bash +# Certbot post-renewal hook for OCI Vault upload +# Managed by Ansible - DO NOT EDIT MANUALLY +# +# This script uploads renewed certificates to OCI Vault so that +# fornax can distribute them to target hosts via Ansible. +# +# Uses Instance Principal authentication (no config file needed). +# Called by certbot --deploy-hook after each successful renewal. + +set -euo pipefail + +CERT_DIR="{{ certbot_directory }}/config/live" +LOG_PREFIX="[$(date '+%Y-%m-%d %H:%M:%S')] [vault-upload]" + +echo "${LOG_PREFIX} Starting vault upload hook" + +# RENEWED_LINEAGE is set by certbot to the path of the renewed cert +# e.g. /srv/certbot/config/live/bootes.helu.ca +if [[ -z "${RENEWED_LINEAGE:-}" ]]; then + echo "${LOG_PREFIX} ERROR: RENEWED_LINEAGE not set — not running under certbot?" + exit 1 +fi + +CERT_NAME=$(basename "${RENEWED_LINEAGE}") +FULLCHAIN="${RENEWED_LINEAGE}/fullchain.pem" +PRIVKEY="${RENEWED_LINEAGE}/privkey.pem" +OCI="{{ certbot_directory }}/.venv/bin/oci" +COMPARTMENT_ID="{{ oci_govern_compartment_id }}" +VAULT_ID="{{ oci_vault_id }}" + +# Convert dots to hyphens to match Terraform secret naming (e.g. pan.helu.ca → pan-helu-ca) +VAULT_PREFIX="${CERT_NAME//./-}" + +echo "${LOG_PREFIX} Processing certificate: ${CERT_NAME} (vault prefix: ${VAULT_PREFIX})" + +if [[ ! -f "${FULLCHAIN}" ]] || [[ ! -f "${PRIVKEY}" ]]; then + echo "${LOG_PREFIX} ERROR: Certificate files not found in ${RENEWED_LINEAGE}" + exit 1 +fi + +# Look up secret OCIDs by name (Terraform creates secrets named {domain-hyphens}-fullchain/-privkey) +lookup_secret_id() { + local secret_name="$1" + local result + if ! result=$(${OCI} vault secret list \ + --auth instance_principal \ + --compartment-id "${COMPARTMENT_ID}" \ + --vault-id "${VAULT_ID}" \ + --name "${secret_name}" \ + --lifecycle-state ACTIVE \ + --all \ + --query 'data[0].id' \ + --raw-output 2>&1); then + echo "${LOG_PREFIX} ERROR: OCI CLI failed looking up secret '${secret_name}': ${result}" >&2 + return 1 + fi + echo "${result}" +} + +FULLCHAIN_SECRET_ID=$(lookup_secret_id "${VAULT_PREFIX}-fullchain") || true +PRIVKEY_SECRET_ID=$(lookup_secret_id "${VAULT_PREFIX}-privkey") || true + +if [[ -z "${FULLCHAIN_SECRET_ID}" ]] || [[ "${FULLCHAIN_SECRET_ID}" == "null" ]] || \ + [[ -z "${PRIVKEY_SECRET_ID}" ]] || [[ "${PRIVKEY_SECRET_ID}" == "null" ]]; then + echo "${LOG_PREFIX} ERROR: Could not find vault secrets for ${VAULT_PREFIX} (fullchain=${FULLCHAIN_SECRET_ID:-missing}, privkey=${PRIVKEY_SECRET_ID:-missing})" + echo "${LOG_PREFIX} Ensure 'terraform apply' has been run on bootes_certificates.tf" + exit 1 +fi + +echo "${LOG_PREFIX} Found secret OCIDs for ${VAULT_PREFIX}" + +# Upload fullchain to OCI Vault +FULLCHAIN_B64=$(base64 -w 0 < "${FULLCHAIN}") +if ! upload_output=$(${OCI} vault secret update-base64 \ + --auth instance_principal \ + --secret-id "${FULLCHAIN_SECRET_ID}" \ + --secret-content-content "${FULLCHAIN_B64}" 2>&1); then + echo "${LOG_PREFIX} ERROR: Failed to upload fullchain for ${CERT_NAME}: ${upload_output}" + exit 1 +fi +echo "${LOG_PREFIX} Uploaded fullchain for ${CERT_NAME}" + +# Upload private key to OCI Vault +PRIVKEY_B64=$(base64 -w 0 < "${PRIVKEY}") +if ! upload_output=$(${OCI} vault secret update-base64 \ + --auth instance_principal \ + --secret-id "${PRIVKEY_SECRET_ID}" \ + --secret-content-content "${PRIVKEY_B64}" 2>&1); then + echo "${LOG_PREFIX} ERROR: Failed to upload privkey for ${CERT_NAME}: ${upload_output}" + exit 1 +fi +echo "${LOG_PREFIX} Uploaded privkey for ${CERT_NAME}" + +{% if certbot_local_cert_name is defined %} +# Also combine cert for local HAProxy if this is the local cert +if [[ "${CERT_NAME}" == "{{ certbot_local_cert_name }}" ]]; then + echo "${LOG_PREFIX} Combining local cert for HAProxy: ${CERT_NAME}" + HAPROXY_CERT="{{ haproxy_cert_path }}" + cat "${FULLCHAIN}" "${PRIVKEY}" > "${HAPROXY_CERT}.tmp" + mv "${HAPROXY_CERT}.tmp" "${HAPROXY_CERT}" + chown {{ certbot_user }}:{{ haproxy_group }} "${HAPROXY_CERT}" + chmod 640 "${HAPROXY_CERT}" + + if systemctl is-active --quiet haproxy; then + echo "${LOG_PREFIX} Reloading HAProxy..." + systemctl reload haproxy + fi +fi +{% endif %} + +# Update certificate metrics +{{ certbot_directory }}/hooks/cert-metrics.sh + +echo "${LOG_PREFIX} Vault upload hook completed successfully" diff --git a/ansible/certbot/vault-validate.yml b/ansible/certbot/vault-validate.yml new file mode 100644 index 0000000..b7d0bf4 --- /dev/null +++ b/ansible/certbot/vault-validate.yml @@ -0,0 +1,340 @@ +--- +# ----------------------------------------------------------------------------- +# Vault Secret Validation Playbook +# ----------------------------------------------------------------------------- +# Tests the full round-trip of OCI Vault secret read/write for all certbot +# domains. Use this BEFORE running certbot to verify vault connectivity +# and permissions without burning Let's Encrypt rate limits. +# +# Run from fornax: +# ansible-playbook certbot/vault-validate.yml +# +# What it does: +# 1. Verifies every expected vault secret exists and is readable +# 2. Writes a unique test value to each secret +# 3. Reads back and compares to confirm the write path works +# 4. Restores the original content +# ----------------------------------------------------------------------------- + +- name: Validate OCI Vault certificate secrets + hosts: localhost + gather_facts: false + environment: + PATH: "/srv/ponos/.local/bin:/usr/local/bin:/usr/bin:/bin" + HOME: "/srv/ponos" + vars: + # Must match the keys in taurus/terraform/bootes_certificates.tf + cert_prefixes: + - apollo-helu-ca + - bootes-helu-ca + - corvus-helu-ca + - draco-helu-ca + - iris-helu-ca + - korax-helu-ca + - nyx-helu-ca + - orpheus-helu-ca + - pan-helu-ca + - perseus-helu-ca + - wildcard-ouranos-helu-ca + secret_suffixes: + - fullchain + - privkey + + tasks: + # ----------------------------------------------------------------- + # Phase 1: Verify all secrets exist and are readable + # ----------------------------------------------------------------- + - name: Build list of all secret names + ansible.builtin.set_fact: + all_secret_names: "{{ cert_prefixes | product(secret_suffixes) | map('join', '-') | list }}" + + - name: "Phase 1 — Read current value from vault" + ansible.builtin.set_fact: + "original_{{ item | replace('-', '_') }}": "{{ lookup('oci_secret', item, vault_id=oci_vault_id) }}" + loop: "{{ all_secret_names }}" + register: phase1_read + + - name: "Phase 1 — Confirm all secrets are readable" + ansible.builtin.assert: + that: + - lookup('vars', 'original_' + item | replace('-', '_')) is defined + - lookup('vars', 'original_' + item | replace('-', '_')) | length > 0 + fail_msg: "Secret '{{ item }}' is missing or empty in OCI Vault" + success_msg: "✓ {{ item }}" + loop: "{{ all_secret_names }}" + loop_control: + label: "{{ item }}" + + - name: "Phase 1 — Summary" + ansible.builtin.debug: + msg: "All {{ all_secret_names | length }} vault secrets exist and are readable." + + # ----------------------------------------------------------------- + # Phase 2: Look up secret OCIDs (needed for write operations) + # ----------------------------------------------------------------- + - name: "Phase 2 — Look up secret OCID for each secret" + ansible.builtin.command: + argv: + - oci + - vault + - secret + - list + - --compartment-id + - "{{ oci_govern_compartment_id }}" + - --vault-id + - "{{ oci_vault_id }}" + - --name + - "{{ item }}" + - --lifecycle-state + - ACTIVE + - --query + - "data[0].id" + - --raw-output + loop: "{{ all_secret_names }}" + loop_control: + label: "{{ item }}" + register: ocid_lookups + changed_when: false + + - name: "Phase 2 — Build secret OCID map" + ansible.builtin.set_fact: + secret_ocids: "{{ secret_ocids | default({}) | combine({item.item: item.stdout | trim}) }}" + loop: "{{ ocid_lookups.results }}" + loop_control: + label: "{{ item.item }}" + + - name: "Phase 2 — Verify all OCIDs resolved" + ansible.builtin.assert: + that: + - secret_ocids[item] is defined + - secret_ocids[item] | length > 0 + - secret_ocids[item] != "null" + fail_msg: "Could not resolve OCID for secret '{{ item }}' — has terraform apply been run?" + success_msg: "✓ {{ item }}" + loop: "{{ all_secret_names }}" + loop_control: + label: "{{ item }}" + + - name: "Phase 2 — Summary" + ansible.builtin.debug: + msg: "Resolved OCIDs for all {{ all_secret_names | length }} secrets." + + # ----------------------------------------------------------------- + # Phase 3: Write a unique test value to each secret + # ----------------------------------------------------------------- + - name: Generate a unique test marker + ansible.builtin.set_fact: + test_marker: "vault-validate-{{ lookup('pipe', 'date +%Y%m%dT%H%M%S') }}" + + - name: "Phase 3 — Write test value to each secret" + ansible.builtin.command: + argv: + - oci + - vault + - secret + - update-base64 + - --secret-id + - "{{ secret_ocids[item] }}" + - --secret-content-content + - "{{ (test_marker + ':' + item) | b64encode }}" + loop: "{{ all_secret_names }}" + loop_control: + label: "{{ item }}" + register: phase3_write + changed_when: true + + - name: "Phase 3 — Summary" + ansible.builtin.debug: + msg: "Wrote test values to {{ all_secret_names | length }} secrets." + + # ----------------------------------------------------------------- + # Phase 4: Read back and compare + # ----------------------------------------------------------------- + - name: "Phase 4 — Read back each secret and verify" + ansible.builtin.assert: + that: + - readback == expected + fail_msg: >- + MISMATCH on {{ item }}: + expected '{{ expected }}' + got '{{ readback }}' + success_msg: "✓ {{ item }} round-trip OK" + vars: + expected: "{{ test_marker + ':' + item }}" + readback: "{{ lookup('oci_secret', item, vault_id=oci_vault_id) }}" + loop: "{{ all_secret_names }}" + loop_control: + label: "{{ item }}" + + - name: "Phase 4 — Summary" + ansible.builtin.debug: + msg: "All {{ all_secret_names | length }} secrets passed round-trip validation." + + # ----------------------------------------------------------------- + # Phase 5: Restore original content + # ----------------------------------------------------------------- + - name: "Phase 5 — Restore original secret content" + ansible.builtin.command: + argv: + - oci + - vault + - secret + - update-base64 + - --secret-id + - "{{ secret_ocids[item] }}" + - --secret-content-content + - "{{ lookup('vars', 'original_' + item | replace('-', '_')) | b64encode }}" + loop: "{{ all_secret_names }}" + loop_control: + label: "{{ item }}" + changed_when: true + + - name: "Phase 5 — Summary" + ansible.builtin.debug: + msg: >- + Round-trip test passed — all {{ all_secret_names | length }} secrets verified. + Original content restored. + + # ----------------------------------------------------------------- + # Phase 6: Validate certificate content in vault + # ----------------------------------------------------------------- + - name: Create temporary directory for cert validation + ansible.builtin.tempfile: + state: directory + prefix: vault-validate- + register: validate_tmpdir + + - name: "Phase 6 — Read fullchain secrets" + ansible.builtin.set_fact: + "fullchain_{{ item | replace('-', '_') }}": "{{ lookup('oci_secret', item + '-fullchain', vault_id=oci_vault_id) }}" + loop: "{{ cert_prefixes }}" + loop_control: + label: "{{ item }}" + + - name: "Phase 6 — Check fullchain is PEM formatted" + ansible.builtin.assert: + that: + - lookup('vars', 'fullchain_' + item | replace('-', '_')) is search('-----BEGIN CERTIFICATE-----') + - lookup('vars', 'fullchain_' + item | replace('-', '_')) is search('-----END CERTIFICATE-----') + fail_msg: >- + {{ item }}-fullchain does not contain a PEM certificate. + Content starts with: {{ lookup('vars', 'fullchain_' + item | replace('-', '_'))[:60] }} + success_msg: "✓ {{ item }}-fullchain is PEM formatted" + loop: "{{ cert_prefixes }}" + loop_control: + label: "{{ item }}" + + - name: "Phase 6 — Write fullchain to temp files for openssl validation" + ansible.builtin.copy: + content: "{{ lookup('vars', 'fullchain_' + item | replace('-', '_')) }}" + dest: "{{ validate_tmpdir.path }}/{{ item }}-fullchain.pem" + mode: '0600' + loop: "{{ cert_prefixes }}" + loop_control: + label: "{{ item }}" + + - name: "Phase 6 — Validate certificate is not expired" + ansible.builtin.command: + argv: + - openssl + - x509 + - -in + - "{{ validate_tmpdir.path }}/{{ item }}-fullchain.pem" + - -checkend + - "0" + - -noout + loop: "{{ cert_prefixes }}" + loop_control: + label: "{{ item }}" + register: cert_expiry_check + changed_when: false + + - name: "Phase 6 — Get certificate details" + ansible.builtin.command: + argv: + - openssl + - x509 + - -in + - "{{ validate_tmpdir.path }}/{{ item }}-fullchain.pem" + - -noout + - -subject + - -enddate + loop: "{{ cert_prefixes }}" + loop_control: + label: "{{ item }}" + register: cert_details + changed_when: false + + - name: "Phase 6 — Display certificate status" + ansible.builtin.debug: + msg: "✓ {{ item.item }}: {{ item.stdout }}" + loop: "{{ cert_details.results }}" + loop_control: + label: "{{ item.item }}" + + - name: "Phase 6 — Read privkey secrets" + ansible.builtin.set_fact: + "privkey_{{ item | replace('-', '_') }}": "{{ lookup('oci_secret', item + '-privkey', vault_id=oci_vault_id) }}" + loop: "{{ cert_prefixes }}" + loop_control: + label: "{{ item }}" + no_log: true + + - name: "Phase 6 — Check privkey is PEM formatted" + ansible.builtin.assert: + that: + - lookup('vars', 'privkey_' + item | replace('-', '_')) is search('-----BEGIN .*(PRIVATE KEY)-----') + - lookup('vars', 'privkey_' + item | replace('-', '_')) is search('-----END .*(PRIVATE KEY)-----') + fail_msg: "{{ item }}-privkey does not contain a PEM private key" + success_msg: "✓ {{ item }}-privkey is PEM formatted" + loop: "{{ cert_prefixes }}" + loop_control: + label: "{{ item }}" + + - name: "Phase 6 — Write privkey to temp files for modulus check" + ansible.builtin.copy: + content: "{{ lookup('vars', 'privkey_' + item | replace('-', '_')) }}" + dest: "{{ validate_tmpdir.path }}/{{ item }}-privkey.pem" + mode: '0600' + loop: "{{ cert_prefixes }}" + loop_control: + label: "{{ item }}" + no_log: true + + - name: "Phase 6 — Verify private key matches certificate" + ansible.builtin.shell: | + set -euo pipefail + cert_mod=$(openssl x509 -noout -modulus -in "{{ validate_tmpdir.path }}/{{ item }}-fullchain.pem" | openssl md5) + key_mod=$(openssl rsa -noout -modulus -in "{{ validate_tmpdir.path }}/{{ item }}-privkey.pem" 2>/dev/null | openssl md5 || \ + openssl ec -noout -text -in "{{ validate_tmpdir.path }}/{{ item }}-privkey.pem" 2>/dev/null | openssl md5) + if [[ "${cert_mod}" != "${key_mod}" ]]; then + echo "MISMATCH: cert=${cert_mod} key=${key_mod}" + exit 1 + fi + echo "OK: modulus match" + args: + executable: /bin/bash + loop: "{{ cert_prefixes }}" + loop_control: + label: "{{ item }}" + register: modulus_check + changed_when: false + + - name: "Phase 6 — Display key match results" + ansible.builtin.debug: + msg: "✓ {{ item.item }}: cert/key pair verified" + loop: "{{ modulus_check.results }}" + loop_control: + label: "{{ item.item }}" + + - name: Clean up temporary directory + ansible.builtin.file: + path: "{{ validate_tmpdir.path }}" + state: absent + + - name: Final summary + ansible.builtin.debug: + msg: >- + Validation complete: + {{ all_secret_names | length }} vault secrets — read/write round-trip OK. + {{ cert_prefixes | length }} certificates — valid PEM, not expired, key pairs match. diff --git a/ansible/haproxy/configure.yml b/ansible/haproxy/configure.yml new file mode 100644 index 0000000..f8103af --- /dev/null +++ b/ansible/haproxy/configure.yml @@ -0,0 +1,101 @@ +--- +# ----------------------------------------------------------------------------- +# HAProxy Configuration Playbook +# ----------------------------------------------------------------------------- +# Templates haproxy.cfg and starts the HAProxy service. Must run AFTER both +# haproxy/deploy.yml and certbot/deploy.yml so that: +# - The HAProxy package is installed +# - The real Let's Encrypt certificate exists at haproxy_cert_path +# +# Dependency chain: +# haproxy/deploy.yml ← package + dirs +# certbot/deploy.yml ← writes cert to /etc/haproxy/certs/ +# haproxy/configure.yml ← this playbook (config + start) +# +# Hosts: horkos (public reverse proxy), bootes (internal HAProxy) +# ----------------------------------------------------------------------------- + +- name: Configure and start HAProxy + hosts: all + become: true + tags: [haproxy, service, configure] + + handlers: + - name: reload haproxy + ansible.builtin.systemd: + name: haproxy + state: reloaded + + - name: restart haproxy + ansible.builtin.systemd: + name: haproxy + state: restarted + + tasks: + - name: Check if host has haproxy service + ansible.builtin.set_fact: + has_haproxy_service: "{{ 'haproxy' in services | default([]) }}" + + - name: Skip hosts without haproxy service + ansible.builtin.meta: end_host + when: not has_haproxy_service + + # ------------------------------------------------------------------------- + # Certificate Check + # ------------------------------------------------------------------------- + + - name: Check if TLS certificate exists + ansible.builtin.stat: + path: "{{ haproxy_cert_path }}" + register: cert_file + + - name: Fail if certificate is missing + ansible.builtin.fail: + msg: > + Certificate not found at {{ haproxy_cert_path }}. + Run certbot/deploy.yml before haproxy/configure.yml. + Command: ansible-playbook certbot/deploy.yml + when: not cert_file.stat.exists + + # ------------------------------------------------------------------------- + # Configuration + # ------------------------------------------------------------------------- + + - name: Template HAProxy configuration + ansible.builtin.template: + src: haproxy.cfg.j2 + dest: /etc/haproxy/haproxy.cfg + owner: root + group: "{{ haproxy_group | default('haproxy') }}" + mode: '0640' + validate: "haproxy -c -f %s" + notify: reload haproxy + + # ------------------------------------------------------------------------- + # Service Management + # ------------------------------------------------------------------------- + + - name: Enable and start HAProxy service + ansible.builtin.systemd: + name: haproxy + enabled: true + state: started + daemon_reload: true + + # ------------------------------------------------------------------------- + # Verification + # ------------------------------------------------------------------------- + + - name: Wait for HAProxy stats port to be ready + ansible.builtin.uri: + url: "http://localhost:{{ haproxy_stats_port }}/metrics" + method: GET + status_code: 200 + register: haproxy_health + retries: 5 + delay: 3 + until: haproxy_health.status == 200 + + - name: HAProxy configuration status + ansible.builtin.debug: + msg: "HAProxy is running and serving metrics on port {{ haproxy_stats_port }}" diff --git a/ansible/haproxy/deploy.yml b/ansible/haproxy/deploy.yml index c45cad7..47edfeb 100644 --- a/ansible/haproxy/deploy.yml +++ b/ansible/haproxy/deploy.yml @@ -1,117 +1,83 @@ --- -- name: Deploy HAProxy - hosts: ubuntu +# ----------------------------------------------------------------------------- +# HAProxy Deployment Playbook +# ----------------------------------------------------------------------------- +# Installs HAProxy and creates the directory structure required by downstream +# playbooks. This playbook must run BEFORE certbot/deploy.yml so that the +# /etc/haproxy/certs directory exists with the correct haproxy group ownership +# when certbot writes the combined PEM file. +# +# Dependency chain: +# haproxy/deploy.yml ← this playbook (package + dirs) +# certbot/deploy.yml ← writes cert to /etc/haproxy/certs/ +# haproxy/configure.yml ← templates haproxy.cfg and starts the service +# +# Hosts: horkos (public reverse proxy), bootes (internal HAProxy) +# ----------------------------------------------------------------------------- + +- name: Deploy HAProxy (package and directory structure) + hosts: all + become: true + tags: [haproxy, service, deploy] + tasks: - name: Check if host has haproxy service - set_fact: - has_haproxy_service: "{{'haproxy' in services}}" + ansible.builtin.set_fact: + has_haproxy_service: "{{ 'haproxy' in services | default([]) }}" - name: Skip hosts without haproxy service - meta: end_host + ansible.builtin.meta: end_host when: not has_haproxy_service - - name: Create haproxy group - become: true - ansible.builtin.group: - name: "{{haproxy_group}}" - gid: "{{haproxy_gid}}" - system: true + # ------------------------------------------------------------------------- + # Install HAProxy + # ------------------------------------------------------------------------- - - name: Create haproxy user - become: true - ansible.builtin.user: - name: "{{haproxy_user}}" - comment: "{{haproxy_user}}" - group: "{{haproxy_group}}" - uid: "{{haproxy_uid}}" - system: true - - - name: Add group haproxy to keeper_user - become: true - ansible.builtin.user: - name: "{{keeper_user}}" - groups: "{{haproxy_group}}" - append: true - - - name: Create required directories - become: true - ansible.builtin.file: - path: "{{haproxy_directory}}" - owner: "{{haproxy_user}}" - group: "{{haproxy_group}}" - state: directory - mode: '750' - - - name: Create /etc/haproxy directory - become: true - ansible.builtin.file: - path: /etc/haproxy - owner: root - group: root - state: directory - mode: '755' - - - name: Create certs directory - become: true - ansible.builtin.file: - path: /etc/haproxy/certs - owner: "{{haproxy_user}}" - group: "{{haproxy_group}}" - state: directory - mode: '750' - - - name: Check if certificate already exists - become: true - stat: - path: "{{ haproxy_cert_path }}" - register: cert_file - - - name: Generate self-signed wildcard certificate - become: true - command: > - openssl req -x509 -nodes -days 365 -newkey rsa:2048 - -keyout {{ haproxy_cert_path }} - -out {{ haproxy_cert_path }} - -subj "/C=US/ST=State/L=City/O=Ouranos/CN=*.{{ haproxy_domain }}" - -addext "subjectAltName=DNS:*.{{ haproxy_domain }},DNS:{{ haproxy_domain }}" - when: not cert_file.stat.exists and 'certbot' not in services - - - name: Set certificate permissions - become: true - ansible.builtin.file: - path: "{{ haproxy_cert_path }}" - owner: "{{haproxy_user}}" - group: "{{haproxy_group}}" - mode: '640' - - - name: Install HAProxy - become: true + - name: Ensure HAProxy is installed ansible.builtin.apt: name: haproxy state: present update_cache: true - - name: Template HAProxy configuration - become: true - ansible.builtin.template: - src: "haproxy.cfg.j2" - dest: /etc/haproxy/haproxy.cfg - owner: "{{haproxy_user}}" - group: "{{haproxy_group}}" - mode: "640" - validate: haproxy -c -f %s - register: haproxy_config + # ------------------------------------------------------------------------- + # User / Group + # HAProxy's apt package creates the haproxy user/group, but we also need + # the certbot group to exist so that /etc/haproxy/certs can be group-owned + # by haproxy and written by certbot. + # ------------------------------------------------------------------------- - - name: Enable and start HAProxy service - become: true - ansible.builtin.systemd: - name: haproxy - enabled: true - state: started + - name: Ensure haproxy group exists + ansible.builtin.group: + name: "{{ haproxy_group | default('haproxy') }}" + system: true - - name: Reload HAProxy if configuration changed - become: true - ansible.builtin.systemd: - name: haproxy - state: reloaded - when: haproxy_config.changed + - name: Ensure haproxy user exists + ansible.builtin.user: + name: "{{ haproxy_user | default('haproxy') }}" + group: "{{ haproxy_group | default('haproxy') }}" + system: true + shell: /usr/sbin/nologin + home: /nonexistent + create_home: false + + # ------------------------------------------------------------------------- + # Directory Structure + # /etc/haproxy/certs must exist with haproxy group ownership before certbot + # runs so that the renewal hook can write the combined PEM file there. + # ------------------------------------------------------------------------- + + - name: Ensure /etc/haproxy directory exists + ansible.builtin.file: + path: /etc/haproxy + owner: root + group: "{{ haproxy_group | default('haproxy') }}" + state: directory + mode: '0755' + + - name: Ensure /etc/haproxy/certs directory exists + ansible.builtin.file: + path: /etc/haproxy/certs + owner: "{{ certbot_user | default('certbot') }}" + group: "{{ haproxy_group | default('haproxy') }}" + state: directory + mode: '0750' diff --git a/ansible/haproxy/haproxy.cfg.j2 b/ansible/haproxy/haproxy.cfg.j2 index 02d7edb..ee6a1c6 100644 --- a/ansible/haproxy/haproxy.cfg.j2 +++ b/ansible/haproxy/haproxy.cfg.j2 @@ -1,9 +1,15 @@ -# HAProxy configuration for Ouranos Titania +# HAProxy configuration for Taurus Production Environment # Managed by Ansible - Red Panda Approved +# +# SSL: Let's Encrypt certificate for helu.ca subdomains +# HTTP backends: Casdoor (talos), Gitea (xenia), SearXNG (xenia) +# TCP backend: Gitea SSH (xenia) global - log 127.0.0.1:{{ haproxy_syslog_port }} local0 + log /dev/log local0 + log /dev/log local1 notice stats timeout 30s + # Ubuntu systemd service handles user/group and daemonization # Default SSL material locations ca-base /etc/ssl/certs @@ -38,29 +44,47 @@ listen stats # Prometheus metrics endpoint http-request use-service prometheus-exporter if { path /metrics } -# HTTP frontend - redirect all traffic to HTTPS +# HTTP to HTTPS redirect frontend http_frontend bind *:{{ haproxy_http_port }} mode http option httplog + + # Redirect all HTTP to HTTPS http-request redirect scheme https code 301 # HTTPS frontend with dynamic routing frontend https_frontend - bind *:{{ haproxy_https_port }} ssl crt {{ haproxy_cert_path }} + bind *:{{ haproxy_https_port }} ssl crt {{ haproxy_cert_path }} alpn h2,http/1.1 mode http option httplog option forwardfor - # Forward original protocol and host for reverse-proxied services - http-request set-header X-Forwarded-Proto https - http-request set-header X-Forwarded-Port %[dst_port] - # Security headers http-response set-header Strict-Transport-Security "max-age=31536000; includeSubDomains" http-response set-header X-Frame-Options "SAMEORIGIN" http-response set-header X-Content-Type-Options "nosniff" http-response set-header X-XSS-Protection "1; mode=block" + + # ------------------------------------------------------------------------- + # Rate limiting via stick-tables + # ------------------------------------------------------------------------- + # General rate limit: 1000 req/min per source IP + stick-table type ip size 100k expire 1m store http_req_rate(1m) + http-request track-sc0 src + + # Auth endpoint rate limit: 20 req/min per source IP + acl is_auth_endpoint path_beg /api/login /api/signup /api/get-captcha /login/oauth/authorize /api/login/oauth/access_token + acl host_id hdr_beg(host) -i id.{{ haproxy_domain }} + + # Use backend stick-table for auth endpoint tracking + http-request track-sc1 src table st_casdoor_auth if host_id is_auth_endpoint + + # Deny if general rate exceeded + http-request deny deny_status 429 if { sc_http_req_rate(0) gt 1000 } + + # Deny if auth endpoint rate exceeded + http-request deny deny_status 429 if host_id is_auth_endpoint { sc_http_req_rate(1,st_casdoor_auth) gt 20 } {% for backend in haproxy_backends %} {% if backend.subdomain %} @@ -86,29 +110,37 @@ backend backend_root {% endif %} mode http balance roundrobin -{% if backend.ssl_backend | default(false) %} option httpchk - http-check send meth GET uri {{ backend.health_path }} hdr Host {{ backend.subdomain }}.{{ haproxy_domain }} -{% else %} - option httpchk GET {{ backend.health_path }} -{% endif %} + http-check send meth GET uri {{ backend.health_path }} ver HTTP/1.1 hdr Host {{ backend.health_host | default(backend.backend_host) }} http-check expect status 200 {% if backend.timeout_server is defined %} timeout server {{ backend.timeout_server }} {% endif %} - server {{ backend.subdomain or 'root' }}_1 {{ backend.backend_host }}:{{ backend.backend_port }} check{% if backend.ssl_backend | default(false) %} ssl verify none{% endif %} + server {{ backend.subdomain or 'root' }}_1 {{ backend.backend_host }}:{{ backend.backend_port }} check {% endfor %} + +# Stick-table for auth endpoint rate limiting (referenced by frontend) +backend st_casdoor_auth + stick-table type ip size 100k expire 1m store http_req_rate(1m) + +# ============================================================================= +# TCP Frontends/Backends (non-HTTP protocols) +# ============================================================================= + {% for tcp_backend in haproxy_tcp_backends | default([]) %} # TCP passthrough: {{ tcp_backend.name }} frontend {{ tcp_backend.name }}_frontend bind *:{{ tcp_backend.listen_port }} mode tcp option tcplog + timeout client 1h default_backend {{ tcp_backend.name }}_backend backend {{ tcp_backend.name }}_backend mode tcp + option tcp-check + timeout server 1h server {{ tcp_backend.name }}_1 {{ tcp_backend.backend_host }}:{{ tcp_backend.backend_port }} check {% endfor %} diff --git a/ansible/inventory/group_vars/all/vars.yml b/ansible/inventory/group_vars/all/vars.yml index d265a2d..22c48f7 100644 --- a/ansible/inventory/group_vars/all/vars.yml +++ b/ansible/inventory/group_vars/all/vars.yml @@ -22,6 +22,7 @@ act_runner_version: "0.2.13" gitea_runner_instance_url: "https://gitea.ouranos.helu.ca" # Release versions for staging playbooks +agent_s_rel: master anythingllm_rel: master athena_rel: master athena_mcp_rel: master @@ -35,6 +36,7 @@ mcp_switchboard_rel: master kernos_rel: master # PyPI release version (no 'v' prefix) - https://pypi.org/project/open-webui/ openwebui_rel: 0.8.3 +pulseaudio_module_xrdp_rel: # MCP URLs argos_mcp_url: http://miranda.incus:25534/mcp diff --git a/ansible/inventory/host_vars/titania.incus.yml b/ansible/inventory/host_vars/titania.incus.yml index 3ab3847..0b5a44e 100644 --- a/ansible/inventory/host_vars/titania.incus.yml +++ b/ansible/inventory/host_vars/titania.incus.yml @@ -26,10 +26,10 @@ certbot_group: certbot certbot_directory: /srv/certbot certbot_email: webmaster@helu.ca certbot_cert_name: ouranos.helu.ca -certbot_domains: - - "*.ouranos.helu.ca" - - "ouranos.helu.ca" prometheus_node_exporter_text_directory: /var/lib/prometheus/node-exporter +certbot_certificates: + - cert_name: wildcard.ouranos.helu.ca + domains: ["*.ouranos.helu.ca"] # HAProxy Configuration haproxy_user: haproxy