feat: add Daedalus application configuration, database setup, and monitoring alerts
This commit is contained in:
@@ -111,6 +111,20 @@ loki.source.syslog "jupyterlab_logs" {
|
|||||||
forward_to = [loki.write.default.receiver]
|
forward_to = [loki.write.default.receiver]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
loki.source.syslog "daedalus_logs" {
|
||||||
|
listener {
|
||||||
|
address = "127.0.0.1:{{daedalus_syslog_port}}"
|
||||||
|
protocol = "tcp"
|
||||||
|
syslog_format = "{{ syslog_format }}"
|
||||||
|
labels = {
|
||||||
|
job = "daedalus",
|
||||||
|
hostname = "{{inventory_hostname}}",
|
||||||
|
environment = "{{deployment_environment}}",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
forward_to = [loki.write.default.receiver]
|
||||||
|
}
|
||||||
|
|
||||||
loki.write "default" {
|
loki.write "default" {
|
||||||
endpoint {
|
endpoint {
|
||||||
url = "{{loki_url}}"
|
url = "{{loki_url}}"
|
||||||
|
|||||||
@@ -240,6 +240,41 @@
|
|||||||
"expireInHours": 168,
|
"expireInHours": 168,
|
||||||
"formCss": "<style>.login-panel{background-color:#ffffff;border-radius:10px;box-shadow:0 0 30px 20px rgba(255,164,21,0.12)}.ant-btn-primary{background-color:#4b96ff!important;border-color:#4b96ff!important}.ant-btn-primary:hover{background-color:#58c0ff!important;border-color:#58c0ff!important}a{color:#ffa415}a:hover{color:#ffc219}</style>",
|
"formCss": "<style>.login-panel{background-color:#ffffff;border-radius:10px;box-shadow:0 0 30px 20px rgba(255,164,21,0.12)}.ant-btn-primary{background-color:#4b96ff!important;border-color:#4b96ff!important}.ant-btn-primary:hover{background-color:#58c0ff!important;border-color:#58c0ff!important}a{color:#ffa415}a:hover{color:#ffc219}</style>",
|
||||||
"footerHtml": "<div style=\"text-align:center;padding:10px;color:#666;\"><a href=\"https://helu.ca\" style=\"color:#4b96ff;text-decoration:none;\">Powered by Helu.ca</a></div>"
|
"footerHtml": "<div style=\"text-align:center;padding:10px;color:#666;\"><a href=\"https://helu.ca\" style=\"color:#4b96ff;text-decoration:none;\">Powered by Helu.ca</a></div>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"owner": "admin",
|
||||||
|
"name": "daedalus",
|
||||||
|
"displayName": "Daedalus",
|
||||||
|
"logo": "https://helu.ca/media/images/helu-ca_logo.original.svg",
|
||||||
|
"homepageUrl": "https://daedalus.ouranos.helu.ca",
|
||||||
|
"organization": "heluca",
|
||||||
|
"cert": "cert-heluca",
|
||||||
|
"enablePassword": true,
|
||||||
|
"enableSignUp": false,
|
||||||
|
"clientId": "{{ vault_daedalus_oauth_client_id }}",
|
||||||
|
"clientSecret": "{{ vault_daedalus_oauth_client_secret }}",
|
||||||
|
"providers": [],
|
||||||
|
"signinMethods": [
|
||||||
|
{"name": "Password", "displayName": "Password", "rule": "All"}
|
||||||
|
],
|
||||||
|
"signupItems": [
|
||||||
|
{"name": "ID", "visible": false, "required": true, "prompted": false, "rule": "Random"},
|
||||||
|
{"name": "Email", "visible": true, "required": true, "prompted": false, "rule": "None"},
|
||||||
|
{"name": "Display name", "visible": true, "required": true, "prompted": false, "rule": "None"},
|
||||||
|
{"name": "Password", "visible": true, "required": true, "prompted": false, "rule": "None"},
|
||||||
|
{"name": "Confirm password", "visible": true, "required": true, "prompted": false, "rule": "None"}
|
||||||
|
],
|
||||||
|
"grantTypes": [
|
||||||
|
"authorization_code",
|
||||||
|
"refresh_token"
|
||||||
|
],
|
||||||
|
"redirectUris": [
|
||||||
|
"https://daedalus.ouranos.helu.ca/oauth/callback"
|
||||||
|
],
|
||||||
|
"tokenFormat": "JWT",
|
||||||
|
"expireInHours": 168,
|
||||||
|
"formCss": "<style>.login-panel{background-color:#ffffff;border-radius:10px;box-shadow:0 0 30px 20px rgba(255,164,21,0.12)}.ant-btn-primary{background-color:#4b96ff!important;border-color:#4b96ff!important}.ant-btn-primary:hover{background-color:#58c0ff!important;border-color:#58c0ff!important}a{color:#ffa415}a:hover{color:#ffc219}</style>",
|
||||||
|
"footerHtml": "<div style=\"text-align:center;padding:10px;color:#666;\"><a href=\"https://helu.ca\" style=\"color:#4b96ff;text-decoration:none;\">Powered by Helu.ca</a></div>"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"users": [
|
"users": [
|
||||||
|
|||||||
@@ -46,6 +46,9 @@ nike_db_password: "{{ vault_nike_db_password }}"
|
|||||||
periplus_db_name: periplus
|
periplus_db_name: periplus
|
||||||
periplus_db_user: periplus
|
periplus_db_user: periplus
|
||||||
periplus_db_password: "{{ vault_periplus_db_password }}"
|
periplus_db_password: "{{ vault_periplus_db_password }}"
|
||||||
|
daedalus_db_name: daedalus
|
||||||
|
daedalus_db_user: daedalus
|
||||||
|
daedalus_db_password: "{{ vault_daedalus_db_password }}"
|
||||||
|
|
||||||
# PostgreSQL admin password
|
# PostgreSQL admin password
|
||||||
postgres_password: "{{ vault_postgres_password }}"
|
postgres_password: "{{ vault_postgres_password }}"
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ kairos_syslog_port: 51451
|
|||||||
icarlos_syslog_port: 51461
|
icarlos_syslog_port: 51461
|
||||||
spelunker_syslog_port: 51481
|
spelunker_syslog_port: 51481
|
||||||
jupyterlab_syslog_port: 51491
|
jupyterlab_syslog_port: 51491
|
||||||
|
daedalus_syslog_port: 51501
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# JupyterLab Configuration
|
# JupyterLab Configuration
|
||||||
|
|||||||
@@ -119,6 +119,11 @@ haproxy_backends:
|
|||||||
backend_host: "rosalind.incus"
|
backend_host: "rosalind.incus"
|
||||||
backend_port: 22082
|
backend_port: 22082
|
||||||
health_path: "/api/healthz"
|
health_path: "/api/healthz"
|
||||||
|
|
||||||
|
- subdomain: "daedalus"
|
||||||
|
backend_host: "puck.incus"
|
||||||
|
backend_port: 23081
|
||||||
|
health_path: "/api/health"
|
||||||
timeout_server: 120s
|
timeout_server: 120s
|
||||||
|
|
||||||
- subdomain: "lobechat"
|
- subdomain: "lobechat"
|
||||||
|
|||||||
@@ -202,6 +202,7 @@
|
|||||||
- { user: "{{ hass_db_user }}", password: "{{ hass_db_password }}" }
|
- { user: "{{ hass_db_user }}", password: "{{ hass_db_password }}" }
|
||||||
- { user: "{{ nike_db_user }}", password: "{{ nike_db_password }}" }
|
- { user: "{{ nike_db_user }}", password: "{{ nike_db_password }}" }
|
||||||
- { user: "{{ periplus_db_user }}", password: "{{ periplus_db_password }}" }
|
- { user: "{{ periplus_db_user }}", password: "{{ periplus_db_password }}" }
|
||||||
|
- { user: "{{ daedalus_db_user }}", password: "{{ daedalus_db_password }}" }
|
||||||
no_log: true
|
no_log: true
|
||||||
|
|
||||||
- name: Create application databases with owners
|
- name: Create application databases with owners
|
||||||
@@ -224,6 +225,7 @@
|
|||||||
- { name: "{{ hass_db_name }}", owner: "{{ hass_db_user }}" }
|
- { name: "{{ hass_db_name }}", owner: "{{ hass_db_user }}" }
|
||||||
- { name: "{{ nike_db_name }}", owner: "{{ nike_db_user }}" }
|
- { name: "{{ nike_db_name }}", owner: "{{ nike_db_user }}" }
|
||||||
- { name: "{{ periplus_db_name }}", owner: "{{ periplus_db_user }}" }
|
- { name: "{{ periplus_db_name }}", owner: "{{ periplus_db_user }}" }
|
||||||
|
- { name: "{{ daedalus_db_name }}", owner: "{{ daedalus_db_user }}" }
|
||||||
|
|
||||||
- name: Enable postgis and pg_trgm extensions in periplus database
|
- name: Enable postgis and pg_trgm extensions in periplus database
|
||||||
community.postgresql.postgresql_ext:
|
community.postgresql.postgresql_ext:
|
||||||
@@ -251,6 +253,7 @@
|
|||||||
- "{{ openwebui_db_name }}"
|
- "{{ openwebui_db_name }}"
|
||||||
- "{{ spelunker_db_name }}"
|
- "{{ spelunker_db_name }}"
|
||||||
- "{{ anythingllm_db_name }}"
|
- "{{ anythingllm_db_name }}"
|
||||||
|
- "{{ daedalus_db_name }}"
|
||||||
|
|
||||||
handlers:
|
handlers:
|
||||||
- name: restart postgresql
|
- name: restart postgresql
|
||||||
|
|||||||
@@ -244,6 +244,74 @@ groups:
|
|||||||
summary: "High log ingestion rate"
|
summary: "High log ingestion rate"
|
||||||
description: "Loki is receiving logs at {{ $value | humanize }}/s which may indicate excessive logging"
|
description: "Loki is receiving logs at {{ $value | humanize }}/s which may indicate excessive logging"
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Daedalus Application Alerts
|
||||||
|
# ============================================================================
|
||||||
|
- name: daedalus_alerts
|
||||||
|
rules:
|
||||||
|
- alert: DaedalusDown
|
||||||
|
expr: daedalus_up == 0
|
||||||
|
for: 1m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "Daedalus is down"
|
||||||
|
description: "Daedalus has been unreachable for more than 1 minute."
|
||||||
|
|
||||||
|
- alert: DaedalusMCPDisconnected
|
||||||
|
expr: daedalus_mcp_connections_active == 0
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Daedalus has no active MCP connections"
|
||||||
|
description: "Daedalus has reported zero active MCP connections for 5 minutes."
|
||||||
|
|
||||||
|
- alert: DaedalusHighErrorRate
|
||||||
|
expr: rate(daedalus_http_requests_total{status=~"5.."}[5m]) / rate(daedalus_http_requests_total[5m]) > 0.05
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Daedalus HTTP 5xx error rate above 5%"
|
||||||
|
description: "Daedalus is returning HTTP 5xx errors at {{ $value | humanizePercentage }} of requests."
|
||||||
|
|
||||||
|
- alert: DaedalusClientExceptionSpike
|
||||||
|
expr: rate(daedalus_client_exceptions_total[1m]) > 10
|
||||||
|
for: 1m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Daedalus client exception spike"
|
||||||
|
description: "Daedalus is recording more than 10 client exceptions per minute (current: {{ $value | printf \"%.1f\" }}/min)."
|
||||||
|
|
||||||
|
- alert: DaedalusSlowResponses
|
||||||
|
expr: histogram_quantile(0.95, rate(daedalus_http_request_duration_seconds_bucket[5m])) > 5
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Daedalus p95 response time above 5s"
|
||||||
|
description: "Daedalus p95 response latency is {{ $value | printf \"%.2f\" }}s."
|
||||||
|
|
||||||
|
- alert: DaedalusMCPLatency
|
||||||
|
expr: histogram_quantile(0.95, rate(daedalus_mcp_request_duration_seconds_bucket[5m])) > 30
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Daedalus MCP p95 latency above 30s"
|
||||||
|
description: "Daedalus MCP p95 latency is {{ $value | printf \"%.2f\" }}s."
|
||||||
|
|
||||||
|
- alert: DaedalusS3Errors
|
||||||
|
expr: rate(daedalus_s3_errors_total[5m]) / rate(daedalus_s3_requests_total[5m]) > 0.01
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Daedalus S3 error rate above 1%"
|
||||||
|
description: "Daedalus S3 error rate is {{ $value | humanizePercentage }} over the last 5 minutes."
|
||||||
|
|
||||||
# Red Panda Seal of Approval 🐼
|
# Red Panda Seal of Approval 🐼
|
||||||
# "If the metrics aren't red, go back to bed"
|
# "If the metrics aren't red, go back to bed"
|
||||||
{% endraw %}
|
{% endraw %}
|
||||||
|
|||||||
@@ -45,4 +45,10 @@ scrape_configs:
|
|||||||
accessKey: ['{{ casdoor_prometheus_access_key }}']
|
accessKey: ['{{ casdoor_prometheus_access_key }}']
|
||||||
accessSecret: ['{{ casdoor_prometheus_access_secret }}']
|
accessSecret: ['{{ casdoor_prometheus_access_secret }}']
|
||||||
|
|
||||||
|
- job_name: 'daedalus'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['puck.incus:22181']
|
||||||
|
metrics_path: '/metrics'
|
||||||
|
scrape_interval: 15s
|
||||||
|
|
||||||
# Red Panda Approved Prometheus Configuration
|
# Red Panda Approved Prometheus Configuration
|
||||||
|
|||||||
@@ -244,6 +244,74 @@ groups:
|
|||||||
summary: "High log ingestion rate"
|
summary: "High log ingestion rate"
|
||||||
description: "Loki is receiving logs at {{ $value | humanize }}/s which may indicate excessive logging"
|
description: "Loki is receiving logs at {{ $value | humanize }}/s which may indicate excessive logging"
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Daedalus Application Alerts
|
||||||
|
# ============================================================================
|
||||||
|
- name: daedalus_alerts
|
||||||
|
rules:
|
||||||
|
- alert: DaedalusDown
|
||||||
|
expr: daedalus_up == 0
|
||||||
|
for: 1m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "Daedalus is down"
|
||||||
|
description: "Daedalus has been unreachable for more than 1 minute."
|
||||||
|
|
||||||
|
- alert: DaedalusMCPDisconnected
|
||||||
|
expr: daedalus_mcp_connections_active == 0
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Daedalus has no active MCP connections"
|
||||||
|
description: "Daedalus has reported zero active MCP connections for 5 minutes."
|
||||||
|
|
||||||
|
- alert: DaedalusHighErrorRate
|
||||||
|
expr: rate(daedalus_http_requests_total{status=~"5.."}[5m]) / rate(daedalus_http_requests_total[5m]) > 0.05
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Daedalus HTTP 5xx error rate above 5%"
|
||||||
|
description: "Daedalus is returning HTTP 5xx errors at {{ $value | humanizePercentage }} of requests."
|
||||||
|
|
||||||
|
- alert: DaedalusClientExceptionSpike
|
||||||
|
expr: rate(daedalus_client_exceptions_total[1m]) > 10
|
||||||
|
for: 1m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Daedalus client exception spike"
|
||||||
|
description: "Daedalus is recording more than 10 client exceptions per minute (current: {{ $value | printf \"%.1f\" }}/min)."
|
||||||
|
|
||||||
|
- alert: DaedalusSlowResponses
|
||||||
|
expr: histogram_quantile(0.95, rate(daedalus_http_request_duration_seconds_bucket[5m])) > 5
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Daedalus p95 response time above 5s"
|
||||||
|
description: "Daedalus p95 response latency is {{ $value | printf \"%.2f\" }}s."
|
||||||
|
|
||||||
|
- alert: DaedalusMCPLatency
|
||||||
|
expr: histogram_quantile(0.95, rate(daedalus_mcp_request_duration_seconds_bucket[5m])) > 30
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Daedalus MCP p95 latency above 30s"
|
||||||
|
description: "Daedalus MCP p95 latency is {{ $value | printf \"%.2f\" }}s."
|
||||||
|
|
||||||
|
- alert: DaedalusS3Errors
|
||||||
|
expr: rate(daedalus_s3_errors_total[5m]) / rate(daedalus_s3_requests_total[5m]) > 0.01
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Daedalus S3 error rate above 1%"
|
||||||
|
description: "Daedalus S3 error rate is {{ $value | humanizePercentage }} over the last 5 minutes."
|
||||||
|
|
||||||
# Red Panda Seal of Approval 🐼
|
# Red Panda Seal of Approval 🐼
|
||||||
# "If the metrics aren't red, go back to bed"
|
# "If the metrics aren't red, go back to bed"
|
||||||
{% endraw %}
|
{% endraw %}
|
||||||
|
|||||||
@@ -45,4 +45,10 @@ scrape_configs:
|
|||||||
accessKey: ['{{ casdoor_prometheus_access_key }}']
|
accessKey: ['{{ casdoor_prometheus_access_key }}']
|
||||||
accessSecret: ['{{ casdoor_prometheus_access_secret }}']
|
accessSecret: ['{{ casdoor_prometheus_access_secret }}']
|
||||||
|
|
||||||
|
- job_name: 'daedalus'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['puck.incus:22181']
|
||||||
|
metrics_path: '/metrics'
|
||||||
|
scrape_interval: 15s
|
||||||
|
|
||||||
# Red Panda Approved Prometheus Configuration
|
# Red Panda Approved Prometheus Configuration
|
||||||
|
|||||||
244
docs/daedalus.md
Normal file
244
docs/daedalus.md
Normal file
@@ -0,0 +1,244 @@
|
|||||||
|
# Daedalus — Deployment Requirements
|
||||||
|
|
||||||
|
All infrastructure runs within the Agathos Incus sandbox. Hosts are resolved via DNS using the `.incus` suffix.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. HAProxy — Titania
|
||||||
|
|
||||||
|
**Host:** `titania.incus`
|
||||||
|
**Domain:** `daedalus.ouranos.helu.ca`
|
||||||
|
|
||||||
|
HAProxy on Titania terminates TLS and routes traffic to Daedalus on puck. Casdoor SSO enforces authentication before requests reach the backend.
|
||||||
|
|
||||||
|
```haproxy
|
||||||
|
frontend https
|
||||||
|
acl host_daedalus hdr(host) -i daedalus.ouranos.helu.ca
|
||||||
|
use_backend daedalus if host_daedalus
|
||||||
|
|
||||||
|
backend daedalus
|
||||||
|
option httpchk GET /api/health
|
||||||
|
server puck puck.incus:22181 check
|
||||||
|
```
|
||||||
|
|
||||||
|
**Requirements:**
|
||||||
|
- ACL entry in the HAProxy `frontend https` block
|
||||||
|
- Backend definition with health check on `/api/health`
|
||||||
|
- Casdoor application configured for `daedalus.ouranos.helu.ca` (same pattern as other Agathos services)
|
||||||
|
- TLS certificate covering `daedalus.ouranos.helu.ca` (wildcard or SAN)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. PostgreSQL — Portia
|
||||||
|
|
||||||
|
**Host:** `portia.incus`
|
||||||
|
**Port:** 5432
|
||||||
|
**Database:** `daedalus`
|
||||||
|
|
||||||
|
Stores conversation history, workspace configuration, user preferences, and file metadata (S3 keys).
|
||||||
|
|
||||||
|
**Provisioning:**
|
||||||
|
```sql
|
||||||
|
CREATE USER daedalus WITH PASSWORD '<from Ansible Vault>';
|
||||||
|
CREATE DATABASE daedalus OWNER daedalus;
|
||||||
|
```
|
||||||
|
|
||||||
|
**Connection string (backend `.env`):**
|
||||||
|
```
|
||||||
|
DAEDALUS_DATABASE_URL=postgresql+asyncpg://daedalus:<password>@portia.incus:5432/daedalus
|
||||||
|
```
|
||||||
|
|
||||||
|
**Schema management:** Alembic migrations run from the backend container/service on puck. No direct DDL.
|
||||||
|
|
||||||
|
**Tables (created by Alembic):**
|
||||||
|
- Workspaces
|
||||||
|
- Conversations / messages
|
||||||
|
- File metadata (S3 keys, filenames, content types, sizes)
|
||||||
|
- User preferences
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. Prometheus Scraping — Prospero
|
||||||
|
|
||||||
|
**Scraper:** `prospero.incus`
|
||||||
|
**Target:** `puck.incus:22181/metrics`
|
||||||
|
|
||||||
|
Daedalus exposes a `/metrics` endpoint in Prometheus text format. Prospero's Prometheus must be configured to scrape it.
|
||||||
|
|
||||||
|
**Prometheus scrape config:**
|
||||||
|
```yaml
|
||||||
|
- job_name: daedalus
|
||||||
|
scrape_interval: 15s
|
||||||
|
metrics_path: /metrics
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- puck.incus:22181
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key metric families:**
|
||||||
|
| Prefix | Category |
|
||||||
|
|--------|----------|
|
||||||
|
| `daedalus_up`, `daedalus_build_info` | Application health |
|
||||||
|
| `daedalus_http_requests_total`, `daedalus_http_request_duration_seconds` | HTTP traffic |
|
||||||
|
| `daedalus_mcp_*` | MCP connection and request metrics |
|
||||||
|
| `daedalus_agent_*` | Agent interaction metrics |
|
||||||
|
| `daedalus_file_*`, `daedalus_s3_*` | File and S3 operations |
|
||||||
|
| `daedalus_client_*` | Browser telemetry (exceptions, Web Vitals) |
|
||||||
|
| `daedalus_feature_usage_total` | Feature usage counters |
|
||||||
|
|
||||||
|
**Network requirement:** The `/metrics` endpoint is restricted to internal networks (`10.10.0.0/24`, `172.16.0.0/12`, `127.0.0.0/8`) in the Nginx config.
|
||||||
|
|
||||||
|
**AlertManager rules** (on Prospero):
|
||||||
|
| Alert | Condition | Severity |
|
||||||
|
|-------|-----------|----------|
|
||||||
|
| `DaedalusDown` | `daedalus_up == 0` for 1m | critical |
|
||||||
|
| `DaedalusMCPDisconnected` | `daedalus_mcp_connections_active == 0` for 5m | warning |
|
||||||
|
| `DaedalusHighErrorRate` | HTTP 5xx > 5% for 5m | warning |
|
||||||
|
| `DaedalusClientExceptionSpike` | Client exceptions > 10/min | warning |
|
||||||
|
| `DaedalusSlowResponses` | p95 > 5s for 5m | warning |
|
||||||
|
| `DaedalusMCPLatency` | MCP p95 > 30s for 5m | warning |
|
||||||
|
| `DaedalusS3Errors` | S3 error rate > 1% for 5m | warning |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. S3 Object Storage — MinIO
|
||||||
|
|
||||||
|
**Provider:** MinIO on Incus (provisioned by Terraform)
|
||||||
|
**Bucket:** `daedalus`
|
||||||
|
|
||||||
|
Stores workspace file uploads. Metadata lives in PostgreSQL; actual bytes live in S3.
|
||||||
|
|
||||||
|
**Key layout:**
|
||||||
|
```
|
||||||
|
workspaces/{workspace_id}/files/{file_id}/{filename}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Backend environment variables:**
|
||||||
|
```
|
||||||
|
DAEDALUS_S3_ENDPOINT=http://<minio-host>:9000
|
||||||
|
DAEDALUS_S3_ACCESS_KEY=<from Ansible Vault>
|
||||||
|
DAEDALUS_S3_SECRET_KEY=<from Ansible Vault>
|
||||||
|
DAEDALUS_S3_BUCKET=daedalus
|
||||||
|
```
|
||||||
|
|
||||||
|
**Requirements:**
|
||||||
|
- Terraform resource for the bucket (same pattern as Casdoor and LobeChat S3 buckets)
|
||||||
|
- Access key / secret key stored in Ansible Vault
|
||||||
|
- Credentials never exposed to the frontend — all file access flows through FastAPI
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. Application Runtime — Puck
|
||||||
|
|
||||||
|
**Host:** `puck.incus`
|
||||||
|
**Nginx port:** 22181 (proxied by HAProxy on Titania)
|
||||||
|
**Uvicorn port:** 8000 (internal only, behind Nginx)
|
||||||
|
|
||||||
|
### Production Deployment
|
||||||
|
|
||||||
|
**Systemd service** (`/etc/systemd/system/daedalus-api.service`):
|
||||||
|
```ini
|
||||||
|
[Unit]
|
||||||
|
Description=Daedalus API (FastAPI)
|
||||||
|
After=network.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=daedalus
|
||||||
|
WorkingDirectory=/srv/daedalus/backend
|
||||||
|
ExecStart=/srv/daedalus/venv/bin/uvicorn daedalus.main:app --host 127.0.0.1 --port 8000 --workers 2
|
||||||
|
Restart=always
|
||||||
|
RestartSec=5
|
||||||
|
Environment=DAEDALUS_ENV=production
|
||||||
|
StandardOutput=journal
|
||||||
|
StandardError=journal
|
||||||
|
SyslogIdentifier=daedalus
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
```
|
||||||
|
|
||||||
|
**Nginx** serves the SvelteKit static build from `/srv/daedalus/frontend/build` and proxies `/api/*` and `/metrics` to Uvicorn on `127.0.0.1:8000`.
|
||||||
|
|
||||||
|
**Directory layout (production):**
|
||||||
|
```
|
||||||
|
/srv/daedalus/
|
||||||
|
├── backend/ # Python source
|
||||||
|
├── frontend/build/ # Static SPA build
|
||||||
|
├── venv/ # Python virtualenv
|
||||||
|
└── .env # Environment configuration
|
||||||
|
```
|
||||||
|
|
||||||
|
### Docker Compose (Development)
|
||||||
|
|
||||||
|
For local development on puck, two containers run behind Docker bridge networking:
|
||||||
|
|
||||||
|
| Service | Image | Port |
|
||||||
|
|---------|-------|------|
|
||||||
|
| `api` | Built from `./backend/Dockerfile` | 8000 (internal) |
|
||||||
|
| `nginx` | `nginx:alpine` | 22181 → 80 |
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up --build
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. Logging — Alloy → Loki
|
||||||
|
|
||||||
|
**No log files.** Structured JSON goes to stdout.
|
||||||
|
|
||||||
|
| Environment | Log path |
|
||||||
|
|-------------|----------|
|
||||||
|
| Production (systemd) | stdout → journal → syslog → Alloy → Loki (prospero) |
|
||||||
|
| Development (Docker) | stdout → Docker log driver → Alloy → Loki (prospero) |
|
||||||
|
|
||||||
|
Alloy on puck is already configured to ship container and journal logs to Loki on Prospero. The `SyslogIdentifier=daedalus` tag allows filtering in Grafana with `{unit="daedalus"}`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. LLM Proxy — Sycorax
|
||||||
|
|
||||||
|
**Host:** `sycorax.incus`
|
||||||
|
|
||||||
|
FastAgent MCP servers route LLM API calls through Arke on Sycorax for multi-provider model routing (OpenAI, Anthropic, etc.). Daedalus does not call Sycorax directly — it communicates with FastAgent servers over MCP Streamable HTTP, and those agents use Sycorax.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. DNS Summary
|
||||||
|
|
||||||
|
| FQDN | Resolves to | Purpose |
|
||||||
|
|------|-------------|---------|
|
||||||
|
| `daedalus.ouranos.helu.ca` | Titania (HAProxy) | Public entry point |
|
||||||
|
| `puck.incus` | Puck | Application host (Nginx + Uvicorn) |
|
||||||
|
| `portia.incus` | Portia | PostgreSQL |
|
||||||
|
| `prospero.incus` | Prospero | Prometheus, Loki, Grafana |
|
||||||
|
| `titania.incus` | Titania | HAProxy + Casdoor SSO |
|
||||||
|
| `sycorax.incus` | Sycorax | LLM proxy (Arke) |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. Deployment Checklist
|
||||||
|
|
||||||
|
### Infrastructure (Terraform / Ansible)
|
||||||
|
- [ ] Create `daedalus` database and user on Portia
|
||||||
|
- [ ] Create `daedalus` S3 bucket in MinIO (Terraform)
|
||||||
|
- [ ] Store DB password and S3 credentials in Ansible Vault
|
||||||
|
- [ ] Add Prometheus scrape target on Prospero
|
||||||
|
- [ ] Add AlertManager rules on Prospero
|
||||||
|
- [ ] Add Grafana dashboard on Prospero
|
||||||
|
- [ ] Configure HAProxy backend + ACL on Titania
|
||||||
|
- [ ] Configure Casdoor application for `daedalus.ouranos.helu.ca`
|
||||||
|
|
||||||
|
### Application (Puck)
|
||||||
|
- [ ] Create `/srv/daedalus` directory structure
|
||||||
|
- [ ] Create `daedalus` system user
|
||||||
|
- [ ] Set up Python virtualenv and install backend dependencies
|
||||||
|
- [ ] Build SvelteKit frontend (`npm run build`)
|
||||||
|
- [ ] Deploy `.env` from Ansible Vault
|
||||||
|
- [ ] Install Nginx site config
|
||||||
|
- [ ] Install and enable systemd service
|
||||||
|
- [ ] Run Alembic migrations (`alembic upgrade head`)
|
||||||
|
- [ ] Verify `/api/health` returns `{"status": "ok"}`
|
||||||
|
- [ ] Verify `/metrics` is reachable from Prospero
|
||||||
|
- [ ] Verify `daedalus.ouranos.helu.ca` loads the SPA through HAProxy
|
||||||
@@ -63,6 +63,23 @@ resource "incus_storage_bucket_key" "spelunker_key" {
|
|||||||
role = "admin"
|
role = "admin"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# S3 bucket for Daedalus file storage
|
||||||
|
resource "incus_storage_bucket" "daedalus" {
|
||||||
|
name = "daedalus"
|
||||||
|
pool = var.storage_pool
|
||||||
|
project = var.project_name
|
||||||
|
description = "Daedalus file storage bucket"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Access key for Daedalus S3 bucket
|
||||||
|
resource "incus_storage_bucket_key" "daedalus_key" {
|
||||||
|
name = "daedalus-access"
|
||||||
|
pool = incus_storage_bucket.daedalus.pool
|
||||||
|
storage_bucket = incus_storage_bucket.daedalus.name
|
||||||
|
project = var.project_name
|
||||||
|
role = "admin"
|
||||||
|
}
|
||||||
|
|
||||||
# Outputs for S3 credentials (to be stored in Ansible vault)
|
# Outputs for S3 credentials (to be stored in Ansible vault)
|
||||||
output "lobechat_s3_credentials" {
|
output "lobechat_s3_credentials" {
|
||||||
description = "Lobechat S3 bucket credentials - store in vault as vault_lobechat_s3_*"
|
description = "Lobechat S3 bucket credentials - store in vault as vault_lobechat_s3_*"
|
||||||
@@ -96,3 +113,14 @@ output "spelunker_s3_credentials" {
|
|||||||
}
|
}
|
||||||
sensitive = true
|
sensitive = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
output "daedalus_s3_credentials" {
|
||||||
|
description = "Daedalus S3 bucket credentials - store in vault as vault_daedalus_s3_*"
|
||||||
|
value = {
|
||||||
|
bucket = incus_storage_bucket.daedalus.name
|
||||||
|
access_key = incus_storage_bucket_key.daedalus_key.access_key
|
||||||
|
secret_key = incus_storage_bucket_key.daedalus_key.secret_key
|
||||||
|
endpoint = "https://${incus_storage_bucket.daedalus.location}"
|
||||||
|
}
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user