feat: add Daedalus application configuration, database setup, and monitoring alerts

This commit is contained in:
2026-03-12 02:16:49 +00:00
parent 540990ef74
commit 67b32b8399
12 changed files with 481 additions and 0 deletions

View File

@@ -244,6 +244,74 @@ groups:
summary: "High log ingestion rate"
description: "Loki is receiving logs at {{ $value | humanize }}/s which may indicate excessive logging"
# ============================================================================
# Daedalus Application Alerts
# ============================================================================
- name: daedalus_alerts
rules:
- alert: DaedalusDown
expr: daedalus_up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Daedalus is down"
description: "Daedalus has been unreachable for more than 1 minute."
- alert: DaedalusMCPDisconnected
expr: daedalus_mcp_connections_active == 0
for: 5m
labels:
severity: warning
annotations:
summary: "Daedalus has no active MCP connections"
description: "Daedalus has reported zero active MCP connections for 5 minutes."
- alert: DaedalusHighErrorRate
expr: rate(daedalus_http_requests_total{status=~"5.."}[5m]) / rate(daedalus_http_requests_total[5m]) > 0.05
for: 5m
labels:
severity: warning
annotations:
summary: "Daedalus HTTP 5xx error rate above 5%"
description: "Daedalus is returning HTTP 5xx errors at {{ $value | humanizePercentage }} of requests."
- alert: DaedalusClientExceptionSpike
expr: rate(daedalus_client_exceptions_total[1m]) > 10
for: 1m
labels:
severity: warning
annotations:
summary: "Daedalus client exception spike"
description: "Daedalus is recording more than 10 client exceptions per minute (current: {{ $value | printf \"%.1f\" }}/min)."
- alert: DaedalusSlowResponses
expr: histogram_quantile(0.95, rate(daedalus_http_request_duration_seconds_bucket[5m])) > 5
for: 5m
labels:
severity: warning
annotations:
summary: "Daedalus p95 response time above 5s"
description: "Daedalus p95 response latency is {{ $value | printf \"%.2f\" }}s."
- alert: DaedalusMCPLatency
expr: histogram_quantile(0.95, rate(daedalus_mcp_request_duration_seconds_bucket[5m])) > 30
for: 5m
labels:
severity: warning
annotations:
summary: "Daedalus MCP p95 latency above 30s"
description: "Daedalus MCP p95 latency is {{ $value | printf \"%.2f\" }}s."
- alert: DaedalusS3Errors
expr: rate(daedalus_s3_errors_total[5m]) / rate(daedalus_s3_requests_total[5m]) > 0.01
for: 5m
labels:
severity: warning
annotations:
summary: "Daedalus S3 error rate above 1%"
description: "Daedalus S3 error rate is {{ $value | humanizePercentage }} over the last 5 minutes."
# Red Panda Seal of Approval 🐼
# "If the metrics aren't red, go back to bed"
{% endraw %}

View File

@@ -45,4 +45,10 @@ scrape_configs:
accessKey: ['{{ casdoor_prometheus_access_key }}']
accessSecret: ['{{ casdoor_prometheus_access_secret }}']
- job_name: 'daedalus'
static_configs:
- targets: ['puck.incus:22181']
metrics_path: '/metrics'
scrape_interval: 15s
# Red Panda Approved Prometheus Configuration