Files
ouranos/ansible/alloy/rosalind/config.alloy.j2
Robert Helewka 3c2f8c57ca feat(observability): add SearXNG, Argos, and Pallas monitoring
- Add SearXNG syslog ingestion and blackbox health probes on miranda
  and rosalind for per-host attributable failure detection
- Scrape Argos MCP application metrics from miranda
- Add Pallas dashboard panels for downstream availability and turn
  error ratios
2026-05-24 23:52:53 -04:00

224 lines
5.8 KiB
Django/Jinja

// Rosalind Alloy Configuration
// Services: Gitea, Lobechat, Nextcloud monitoring
logging {
level = "{{alloy_log_level}}"
format = "logfmt"
}
// ============================================================================
// LOG COLLECTION - Loki Forwarding
// ============================================================================
// System log files
loki.source.file "system_logs" {
targets = [
{__path__ = "/var/log/syslog", job = "syslog"},
{__path__ = "/var/log/auth.log", job = "auth"},
]
forward_to = [loki.write.default.receiver]
}
// Systemd journal logs (includes AnythingLLM server/collector)
loki.source.journal "systemd_logs" {
forward_to = [loki.write.default.receiver]
labels = {
job = "systemd",
hostname = "{{inventory_hostname}}",
environment = "{{deployment_environment}}",
}
}
// Gitea application logs
loki.source.file "gitea_logs" {
targets = [
{__path__ = "/var/log/gitea/gitea.log", job = "gitea"},
]
forward_to = [loki.write.default.receiver]
}
// Apache access and error logs (Nextcloud)
loki.source.file "apache_logs" {
targets = [
{__path__ = "/var/log/apache2/access.log", job = "apache_access"},
{__path__ = "/var/log/apache2/error.log", job = "apache_error"},
]
forward_to = [loki.write.default.receiver]
}
prometheus.scrape "hass" {
targets = [{
__address__ = "127.0.0.1:{{hass_port}}",
job = "hass",
hostname = "{{inventory_hostname}}",
environment = "{{deployment_environment}}",
}]
forward_to = [prometheus.remote_write.default.receiver]
scrape_interval = "60s"
metrics_path = "/api/prometheus"
bearer_token = "{{hass_metrics_token}}"
}
// Lobechat Docker syslog
loki.source.syslog "lobechat_logs" {
listener {
address = "127.0.0.1:{{ lobechat_syslog_port }}"
protocol = "tcp"
syslog_format = "{{ syslog_format }}"
labels = {
job = "lobechat",
hostname = "{{inventory_hostname}}",
environment = "{{deployment_environment}}",
}
}
forward_to = [loki.write.default.receiver]
}
// Jellyfin Docker syslog
loki.source.syslog "jellyfin_logs" {
listener {
address = "127.0.0.1:{{ jellyfin_syslog_port }}"
protocol = "tcp"
syslog_format = "{{ syslog_format }}"
labels = {
job = "jellyfin",
hostname = "{{inventory_hostname}}",
environment = "{{deployment_environment}}",
}
}
forward_to = [loki.write.default.receiver]
}
loki.source.syslog "searxng_logs" {
listener {
address = "127.0.0.1:{{searxng_syslog_port}}"
protocol = "tcp"
syslog_format = "{{ syslog_format }}"
labels = {
job = "searxng",
hostname = "{{inventory_hostname}}",
environment = "{{deployment_environment}}",
}
}
forward_to = [loki.write.default.receiver]
}
// Loki endpoint
loki.write "default" {
endpoint {
url = "{{loki_url}}"
}
}
// ============================================================================
// METRICS COLLECTION - Prometheus Remote Write
// ============================================================================
// Unix/Node metrics - Incus-safe collectors only
prometheus.exporter.unix "default" {
include_exporter_metrics = true
disable_collectors = [
"arp",
"bcache",
"bonding",
"btrfs",
"hwmon",
"infiniband",
"ipvs",
"mdadm",
"nfs",
"nfsd",
"nvme",
"powersupplyclass",
"rapl",
"thermal_zone",
"zfs",
]
}
// Process exporter - Track all processes by command name
prometheus.exporter.process "default" {
track_children = true
track_threads = true
gather_smaps = false
recheck_on_scrape = true
matcher {
name = "{% raw %}{{.Comm}}{% endraw %}"
cmdline = [".+"]
}
}
// cAdvisor - Docker container metrics (for Lobechat)
prometheus.exporter.cadvisor "default" {
docker_host = "unix:///var/run/docker.sock"
store_container_labels = true
docker_only = true
}
// Prometheus scrape configurations
prometheus.scrape "unix" {
targets = prometheus.exporter.unix.default.targets
forward_to = [prometheus.remote_write.default.receiver]
scrape_interval = "15s"
}
prometheus.scrape "process" {
targets = prometheus.exporter.process.default.targets
forward_to = [prometheus.remote_write.default.receiver]
scrape_interval = "15s"
}
prometheus.scrape "cadvisor" {
targets = prometheus.exporter.cadvisor.default.targets
forward_to = [prometheus.remote_write.default.receiver]
scrape_interval = "15s"
}
// Gitea application metrics
prometheus.scrape "gitea" {
targets = [{
__address__ = "127.0.0.1:{{gitea_web_port}}",
job = "gitea",
hostname = "{{inventory_hostname}}",
environment = "{{deployment_environment}}",
}]
forward_to = [prometheus.remote_write.default.receiver]
scrape_interval = "30s"
metrics_path = "/metrics"
bearer_token = "{{gitea_metrics_token}}"
}
// Independent verification that this host's SearXNG instance answers /healthz.
// Argos (on miranda) load-balances across this instance and miranda's own;
// each host's Alloy probes its local SearXNG so failures are attributable.
prometheus.exporter.blackbox "searxng" {
config = "{ modules: { http_2xx: { prober: http, timeout: 5s, http: { valid_status_codes: [200] } } } }"
target {
name = "{{inventory_hostname}}"
address = "http://127.0.0.1:{{searxng_port}}/healthz"
module = "http_2xx"
labels = {
service = "searxng",
hostname = "{{inventory_hostname}}",
environment = "{{deployment_environment}}",
}
}
}
prometheus.scrape "searxng_blackbox" {
targets = prometheus.exporter.blackbox.searxng.targets
forward_to = [prometheus.remote_write.default.receiver]
scrape_interval = "30s"
job_name = "searxng_blackbox"
}
// Prometheus remote write endpoint
prometheus.remote_write "default" {
endpoint {
url = "{{prometheus_remote_write_url}}"
}
}