feat(observability): add SearXNG, Argos, and Pallas monitoring
- Add SearXNG syslog ingestion and blackbox health probes on miranda and rosalind for per-host attributable failure detection - Scrape Argos MCP application metrics from miranda - Add Pallas dashboard panels for downstream availability and turn error ratios
This commit is contained in:
@@ -93,6 +93,20 @@ loki.source.syslog "gitea_mcp_logs" {
|
||||
forward_to = [loki.write.default.receiver]
|
||||
}
|
||||
|
||||
loki.source.syslog "searxng_logs" {
|
||||
listener {
|
||||
address = "127.0.0.1:{{searxng_syslog_port}}"
|
||||
protocol = "tcp"
|
||||
syslog_format = "{{ syslog_format }}"
|
||||
labels = {
|
||||
job = "searxng",
|
||||
hostname = "{{inventory_hostname}}",
|
||||
environment = "{{deployment_environment}}",
|
||||
}
|
||||
}
|
||||
forward_to = [loki.write.default.receiver]
|
||||
}
|
||||
|
||||
prometheus.exporter.unix "default" {
|
||||
include_exporter_metrics = true
|
||||
disable_collectors = ["mdadm"]
|
||||
@@ -104,6 +118,45 @@ prometheus.scrape "default" {
|
||||
job_name = "mcp_docker_host"
|
||||
}
|
||||
|
||||
// Argos MCP application metrics (/metrics is exposed by argos itself; see
|
||||
// argos/argos_searxng/metrics.py).
|
||||
prometheus.scrape "argos" {
|
||||
targets = [{
|
||||
__address__ = "127.0.0.1:{{argos_port}}",
|
||||
job = "argos",
|
||||
instance = "{{inventory_hostname}}",
|
||||
hostname = "{{inventory_hostname}}",
|
||||
environment = "{{deployment_environment}}",
|
||||
}]
|
||||
forward_to = [prometheus.remote_write.default.receiver]
|
||||
scrape_interval = "30s"
|
||||
metrics_path = "/metrics"
|
||||
}
|
||||
|
||||
// Independent verification that this host's SearXNG instance answers /healthz
|
||||
// (Argos's own per-instance gauge can lie — argos itself could be sick).
|
||||
prometheus.exporter.blackbox "searxng" {
|
||||
config = "{ modules: { http_2xx: { prober: http, timeout: 5s, http: { valid_status_codes: [200] } } } }"
|
||||
|
||||
target {
|
||||
name = "{{inventory_hostname}}"
|
||||
address = "http://127.0.0.1:{{searxng_port}}/healthz"
|
||||
module = "http_2xx"
|
||||
labels = {
|
||||
service = "searxng",
|
||||
hostname = "{{inventory_hostname}}",
|
||||
environment = "{{deployment_environment}}",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
prometheus.scrape "searxng_blackbox" {
|
||||
targets = prometheus.exporter.blackbox.searxng.targets
|
||||
forward_to = [prometheus.remote_write.default.receiver]
|
||||
scrape_interval = "30s"
|
||||
job_name = "searxng_blackbox"
|
||||
}
|
||||
|
||||
prometheus.remote_write "default" {
|
||||
endpoint {
|
||||
url = "{{prometheus_remote_write_url}}"
|
||||
|
||||
@@ -190,6 +190,31 @@ prometheus.scrape "gitea" {
|
||||
bearer_token = "{{gitea_metrics_token}}"
|
||||
}
|
||||
|
||||
// Independent verification that this host's SearXNG instance answers /healthz.
|
||||
// Argos (on miranda) load-balances across this instance and miranda's own;
|
||||
// each host's Alloy probes its local SearXNG so failures are attributable.
|
||||
prometheus.exporter.blackbox "searxng" {
|
||||
config = "{ modules: { http_2xx: { prober: http, timeout: 5s, http: { valid_status_codes: [200] } } } }"
|
||||
|
||||
target {
|
||||
name = "{{inventory_hostname}}"
|
||||
address = "http://127.0.0.1:{{searxng_port}}/healthz"
|
||||
module = "http_2xx"
|
||||
labels = {
|
||||
service = "searxng",
|
||||
hostname = "{{inventory_hostname}}",
|
||||
environment = "{{deployment_environment}}",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
prometheus.scrape "searxng_blackbox" {
|
||||
targets = prometheus.exporter.blackbox.searxng.targets
|
||||
forward_to = [prometheus.remote_write.default.receiver]
|
||||
scrape_interval = "30s"
|
||||
job_name = "searxng_blackbox"
|
||||
}
|
||||
|
||||
// Prometheus remote write endpoint
|
||||
prometheus.remote_write "default" {
|
||||
endpoint {
|
||||
|
||||
Reference in New Issue
Block a user