feat(observability): add SearXNG, Argos, and Pallas monitoring
- Add SearXNG syslog ingestion and blackbox health probes on miranda and rosalind for per-host attributable failure detection - Scrape Argos MCP application metrics from miranda - Add Pallas dashboard panels for downstream availability and turn error ratios
This commit is contained in:
@@ -15,8 +15,7 @@ neo4j_syslog_port: 51414
|
||||
neo4j_user: neo4j
|
||||
neo4j_group: neo4j
|
||||
neo4j_directory: /srv/neo4j
|
||||
neo4j_auth_user: neo4j
|
||||
neo4j_auth_password: "{{ vault_neo4j_auth_password }}"
|
||||
neo4j_password: "{{ vault_neo4j_cypher_password }}"
|
||||
neo4j_http_port: 22084
|
||||
neo4j_bolt_port: 22074
|
||||
neo4j_metrics_port: 22094
|
||||
|
||||
@@ -10,21 +10,23 @@ services:
|
||||
- grafana_mcp
|
||||
- mcpo
|
||||
- neo4j_mcp
|
||||
- searxng
|
||||
|
||||
# Alloy
|
||||
alloy_log_level: "warn"
|
||||
argos_syslog_port: 51434
|
||||
neo4j_cypher_syslog_port: 51431
|
||||
grafana_mcp_syslog_port: 51433
|
||||
gitea_mcp_syslog_port: 51435
|
||||
argos_syslog_port: 51418
|
||||
neo4j_cypher_syslog_port: 51414
|
||||
grafana_mcp_syslog_port: 51413
|
||||
gitea_mcp_syslog_port: 51412
|
||||
searxng_syslog_port: 51419
|
||||
|
||||
# Argos MCP Configuration
|
||||
argos_user: argos
|
||||
argos_group: argos
|
||||
argos_directory: /srv/argos
|
||||
argos_port: 25534
|
||||
argos_port: 20861
|
||||
argos_log_level: INFO
|
||||
argos_searxng_instances: http://rosalind.incus:22089/
|
||||
argos_searxng_instances: http://miranda.incus:22089/,http://rosalind.incus:22089/
|
||||
argos_cache_ttl: 300
|
||||
argos_max_results: 10
|
||||
argos_request_timeout: 30.0
|
||||
@@ -48,7 +50,7 @@ neo4j_mcp_directory: /srv/neo4j_mcp
|
||||
grafana_mcp_user: grafana_mcp
|
||||
grafana_mcp_group: grafana_mcp
|
||||
grafana_mcp_directory: /srv/grafana_mcp
|
||||
grafana_mcp_port: 25533
|
||||
grafana_mcp_port: 22063
|
||||
grafana_mcp_grafana_host: prospero.incus
|
||||
grafana_mcp_grafana_port: 3000
|
||||
grafana_service_account_token: "{{ vault_grafana_service_account_token }}"
|
||||
@@ -57,21 +59,28 @@ grafana_service_account_token: "{{ vault_grafana_service_account_token }}"
|
||||
gitea_mcp_user: gitea_mcp
|
||||
gitea_mcp_group: gitea_mcp
|
||||
gitea_mcp_directory: /srv/gitea_mcp
|
||||
gitea_mcp_port: 25535
|
||||
gitea_mcp_port: 22062
|
||||
gitea_mcp_host: https://gitea.ouranos.helu.ca
|
||||
gitea_mcp_access_token: "{{ vault_gitea_mcp_access_token }}"
|
||||
|
||||
# Neo4j Cypher MCP
|
||||
neo4j_host: ariel.incus
|
||||
neo4j_bolt_port: 7687
|
||||
neo4j_auth_password: "{{ vault_neo4j_auth_password }}"
|
||||
neo4j_cypher_mcp_port: 25531
|
||||
|
||||
# Nike MCP
|
||||
nike_mcp_url: http://puck.incus:25576/mcp
|
||||
neo4j_bolt_port: 22074
|
||||
neo4j_cypher_password: "{{ vault_neo4j_cypher_password }}"
|
||||
neo4j_cypher_mcp_port: 22064
|
||||
neo4j_mcp_server_allowed_hosts: localhost,127.0.0.1,miranda.incus
|
||||
|
||||
# MCPO Config
|
||||
mcpo_user: mcpo
|
||||
mcpo_group: mcpo
|
||||
mcpo_directory: /srv/mcpo
|
||||
mcpo_port: 25530
|
||||
|
||||
# SearXNG Configuration
|
||||
searxng_user: searxng
|
||||
searxng_group: searxng
|
||||
searxng_directory: /srv/searxng
|
||||
searxng_port: 22089
|
||||
searxng_base_url: http://miranda.incus:22089/
|
||||
searxng_instance_name: "Ouranos Search"
|
||||
searxng_secret_key: "{{ vault_searxng_secret_key }}"
|
||||
|
||||
@@ -74,6 +74,22 @@ prometheus_targets:
|
||||
- 'rosalind.incus:9100'
|
||||
- 'umbriel.incus:9100'
|
||||
|
||||
# Neo4j scrape targets (neo4j-apoc-exporter sidecar on each Neo4j host)
|
||||
neo4j_metrics_targets:
|
||||
- 'ariel.incus:22094'
|
||||
- 'umbriel.incus:22094'
|
||||
|
||||
# Pallas scrape targets — one entry per Pallas deployment (registry
|
||||
# port). The `instance` label distinguishes deployments; the `agent`
|
||||
# dimension comes from labels on the metrics themselves.
|
||||
pallas_metrics_targets:
|
||||
- targets: ['caliban.incus:24000']
|
||||
labels: {instance: iolaus}
|
||||
- targets: ['caliban.incus:24100']
|
||||
labels: {instance: kottos}
|
||||
- targets: ['caliban.incus:24200']
|
||||
labels: {instance: mentor}
|
||||
|
||||
# Prometheus OAuth2-Proxy Sidecar
|
||||
prometheus_proxy_port: 9091
|
||||
prometheus_oauth2_proxy_dir: /etc/oauth2-proxy-prometheus
|
||||
@@ -127,10 +143,23 @@ pgadmin_oauth_client_id: "{{ vault_pgadmin_oauth_client_id }}"
|
||||
pgadmin_oauth_client_secret: "{{ vault_pgadmin_oauth_client_secret }}"
|
||||
|
||||
# ============================================================================
|
||||
# Casdoor Metrics (for Prometheus scraping)
|
||||
# Prometheus Metrics Scraping
|
||||
# ============================================================================
|
||||
|
||||
casdoor_metrics_host: "titania.incus"
|
||||
# Casdoor
|
||||
casdoor_metrics_host: titania.incus
|
||||
casdoor_metrics_port: 22081
|
||||
casdoor_prometheus_access_key: "{{ vault_casdoor_prometheus_access_key }}"
|
||||
casdoor_prometheus_access_secret: "{{ vault_casdoor_prometheus_access_secret }}"
|
||||
|
||||
# Daedalus Metrics
|
||||
daedalus_metrics_host: caliban.incus
|
||||
daedalus_metrics_port: 23081
|
||||
|
||||
# Mnemosyne — two scrape targets:
|
||||
# app: Django /metrics via nginx (django-prometheus + custom pipeline/MCP counters)
|
||||
# web: nginx-prometheus-exporter sidecar (nginx stub_status → Prometheus format)
|
||||
mnemosyne_app_metrics_host: caliban.incus
|
||||
mnemosyne_app_metrics_port: 23181
|
||||
mnemosyne_web_metrics_host: caliban.incus
|
||||
mnemosyne_web_metrics_port: 23191
|
||||
|
||||
@@ -37,14 +37,17 @@ daedalus_syslog_port: 51430
|
||||
|
||||
# Daedalus (FastAPI on puck, behind nginx)
|
||||
daedalus_metrics_host: "puck.incus"
|
||||
daedalus_metrics_port: 22181
|
||||
daedalus_metrics_port: 23081
|
||||
|
||||
# Mnemosyne — /metrics is served by nginx (mnemosyne-web:23181) and
|
||||
# proxied to the Django app container, which owns the single
|
||||
# prometheus_client process registry that both django-prometheus
|
||||
# (HTTP / Celery) and the MCP server's tool-call counters write to.
|
||||
mnemosyne_metrics_host: "puck.incus"
|
||||
mnemosyne_metrics_port: 23181
|
||||
# Mnemosyne — two metrics surfaces:
|
||||
# app (23181): /metrics served by nginx → Django app container, which owns
|
||||
# the single prometheus_client process registry that both django-prometheus
|
||||
# (HTTP / Celery) and the MCP server's tool-call counters write to.
|
||||
# web (23191): nginx-prometheus-exporter sidecar scraping nginx stub_status.
|
||||
mnemosyne_app_metrics_host: "puck.incus"
|
||||
mnemosyne_app_metrics_port: 23181
|
||||
mnemosyne_web_metrics_host: "puck.incus"
|
||||
mnemosyne_web_metrics_port: 23191
|
||||
|
||||
# =============================================================================
|
||||
# Kottos Configuration (Pallas FastAgent runtime)
|
||||
|
||||
@@ -122,8 +122,8 @@ haproxy_backends:
|
||||
health_path: "/api/healthz"
|
||||
|
||||
- subdomain: "daedalus"
|
||||
backend_host: "puck.incus"
|
||||
backend_port: 20080
|
||||
backend_host: "caliban.incus"
|
||||
backend_port: 20081
|
||||
health_path: "/ready/"
|
||||
timeout_server: 120s
|
||||
|
||||
@@ -133,8 +133,8 @@ haproxy_backends:
|
||||
health_path: "/chat"
|
||||
|
||||
- subdomain: "mnemosyne"
|
||||
backend_host: "puck.incus"
|
||||
backend_port: 23181
|
||||
backend_host: "caliban.incus"
|
||||
backend_port: 23081
|
||||
health_path: "/ready/"
|
||||
|
||||
- subdomain: "nextcloud"
|
||||
|
||||
@@ -19,8 +19,7 @@ neo4j_syslog_port: 51414
|
||||
neo4j_user: neo4j
|
||||
neo4j_group: neo4j
|
||||
neo4j_directory: /srv/neo4j
|
||||
neo4j_auth_user: neo4j
|
||||
neo4j_auth_password: "{{ vault_mnemosyne_neo4j_auth_password }}"
|
||||
neo4j_password: "{{ vault_neo4j_mnemosyne_password }}"
|
||||
neo4j_http_port: 22084
|
||||
neo4j_bolt_port: 22074
|
||||
neo4j_metrics_port: 22094
|
||||
|
||||
Reference in New Issue
Block a user