feat(observability): add SearXNG, Argos, and Pallas monitoring
- Add SearXNG syslog ingestion and blackbox health probes on miranda and rosalind for per-host attributable failure detection - Scrape Argos MCP application metrics from miranda - Add Pallas dashboard panels for downstream availability and turn error ratios
This commit is contained in:
@@ -51,28 +51,44 @@ scrape_configs:
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 15s
|
||||
|
||||
# Mnemosyne — single /metrics endpoint on the app container serves both
|
||||
# django-prometheus HTTP/Celery metrics and the MCP server's tool-call
|
||||
# counters (the mcp_server.metrics module registers into the same
|
||||
# prometheus_client process registry on the Django side). The mcp
|
||||
# container itself does not expose /metrics; run 'em on the WSGI side.
|
||||
# Mnemosyne — app exposes /metrics on the Django container (proxied via
|
||||
# nginx); a single prometheus_client process registry serves both
|
||||
# django-prometheus (HTTP/Celery) and the MCP server's tool-call counters
|
||||
# (the mcp container itself does not expose /metrics). Web is an
|
||||
# nginx-prometheus-exporter sidecar that scrapes the web container's
|
||||
# stub_status and re-exposes it in Prometheus format.
|
||||
- job_name: 'mnemosyne'
|
||||
static_configs:
|
||||
- targets: ['{{ mnemosyne_metrics_host }}:{{ mnemosyne_metrics_port }}']
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 15s
|
||||
static_configs:
|
||||
- targets: ['{{ mnemosyne_app_metrics_host }}:{{ mnemosyne_app_metrics_port }}']
|
||||
labels:
|
||||
component: app
|
||||
- targets: ['{{ mnemosyne_web_metrics_host }}:{{ mnemosyne_web_metrics_port }}']
|
||||
labels:
|
||||
component: web
|
||||
|
||||
# Pallas — each deployment is one scrape target (registry port).
|
||||
# Pallas uses a single process-global registry, so per-agent /metrics
|
||||
# endpoints serve the same snapshot; the `agent` dimension is carried
|
||||
# as a metric label, not a target. Targets are defined per
|
||||
# environment in pallas_metrics_targets (host_vars on the Prometheus
|
||||
# host); instances are differentiated by the `instance` label.
|
||||
{% if pallas_metrics_targets | default([]) %}
|
||||
- job_name: 'pallas'
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 15s
|
||||
static_configs: {{ pallas_metrics_targets | to_json }}
|
||||
{% endif %}
|
||||
|
||||
# Neo4j — stscoundrel/neo4j-apoc-exporter sidecar connects to the local
|
||||
# Neo4j over Bolt and exposes apoc.monitor.* (tx/ids/store) plus JVM
|
||||
# metrics on the standard metrics port (22094). Both Ariel (LLM memory
|
||||
# via neo4j-cypher MCP) and Umbriel (Mnemosyne graph+vector DB) use the
|
||||
# same port — they are differentiated by hostname only.
|
||||
# metrics. Targets are listed per-environment in neo4j_metrics_targets
|
||||
# (host_vars on the Prometheus host) — instances are differentiated by
|
||||
# hostname only.
|
||||
- job_name: 'neo4j'
|
||||
static_configs:
|
||||
- targets:
|
||||
- 'ariel.incus:22094'
|
||||
- 'umbriel.incus:22094'
|
||||
- targets: {{ neo4j_metrics_targets | to_json }}
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 15s
|
||||
|
||||
# Red Panda Approved Prometheus Configuration
|
||||
|
||||
Reference in New Issue
Block a user