feat(observability): add SearXNG, Argos, and Pallas monitoring

- Add SearXNG syslog ingestion and blackbox health probes on miranda
  and rosalind for per-host attributable failure detection
- Scrape Argos MCP application metrics from miranda
- Add Pallas dashboard panels for downstream availability and turn
  error ratios
This commit is contained in:
2026-05-24 23:52:53 -04:00
parent 43fae203d1
commit 3c2f8c57ca
24 changed files with 1968 additions and 938 deletions

307
dashboards/argos.json Normal file
View File

@@ -0,0 +1,307 @@
{
"title": "Argos",
"uid": "argos",
"tags": ["argos", "mcp", "searxng", "ouranos"],
"timezone": "browser",
"schemaVersion": 39,
"version": 1,
"editable": true,
"fiscalYearStartMonth": 0,
"weekStart": "",
"refresh": "30s",
"time": {"from": "now-1h", "to": "now"},
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [],
"targetBlank": true,
"title": "SearXNG dashboard",
"tooltip": "SearXNG instance probes (miranda, rosalind)",
"type": "link",
"url": "/d/searxng"
}
],
"templating": {
"list": [
{
"name": "prom",
"type": "datasource",
"query": "prometheus",
"current": {"selected": false, "text": "Prometheus", "value": "Prometheus"},
"hide": 0,
"label": "Prometheus datasource"
},
{
"name": "loki",
"type": "datasource",
"query": "loki",
"current": {"selected": false, "text": "Loki", "value": "Loki"},
"hide": 0,
"label": "Loki datasource"
},
{
"name": "instance",
"type": "query",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"query": "label_values(up{job=\"argos\"}, instance)",
"refresh": 1,
"includeAll": true,
"multi": true,
"current": {"selected": true, "text": "All", "value": "$__all"},
"label": "Argos host"
}
]
},
"panels": [
{
"id": 1,
"type": "row",
"title": "Health",
"collapsed": false,
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}
},
{
"id": 2,
"type": "stat",
"title": "Argos up",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 4, "x": 0, "y": 1},
"targets": [
{"refId": "A", "expr": "up{job=\"argos\", instance=~\"$instance\"}", "legendFormat": "{{instance}}"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "background", "textMode": "value_and_name"},
"fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 1}]}}}
},
{
"id": 3,
"type": "stat",
"title": "SearXNG instances healthy (per Argos)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 6, "x": 4, "y": 1},
"targets": [
{"refId": "A", "expr": "sum by (instance) (argos_searxng_instance_up{instance=~\"$instance\"})", "legendFormat": "{{instance}}"},
{"refId": "B", "expr": "count by (instance) (argos_searxng_instance_up{instance=~\"$instance\"})", "legendFormat": "{{instance}} total", "hide": true}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "value_and_name", "colorMode": "value"},
"fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "orange", "value": 1}, {"color": "green", "value": 2}]}}}
},
{
"id": 4,
"type": "stat",
"title": "Tool error ratio (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 4, "x": 10, "y": 1},
"targets": [
{"refId": "A", "expr": "sum(rate(argos_tool_calls_total{status=\"error\", instance=~\"$instance\"}[5m])) / clamp_min(sum(rate(argos_tool_calls_total{instance=~\"$instance\"}[5m])), 0.0001)", "legendFormat": "errors"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value"},
"fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 0.05}, {"color": "red", "value": 0.20}]}}}
},
{
"id": 5,
"type": "stat",
"title": "Tool calls/sec (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 4, "x": 14, "y": 1},
"targets": [
{"refId": "A", "expr": "sum(rate(argos_tool_calls_total{instance=~\"$instance\"}[5m]))", "legendFormat": "calls/s"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value", "graphMode": "area"},
"fieldConfig": {"defaults": {"unit": "ops"}}
},
{
"id": 6,
"type": "stat",
"title": "Build",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 6, "x": 18, "y": 1},
"targets": [
{"refId": "A", "expr": "argos_build_info{instance=~\"$instance\"}", "legendFormat": "{{instance}} v{{version}}"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "name", "colorMode": "none"},
"fieldConfig": {"defaults": {"unit": "none"}}
},
{
"id": 10,
"type": "row",
"title": "Tools",
"collapsed": false,
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 5}
},
{
"id": 11,
"type": "timeseries",
"title": "Tool calls/sec by tool (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 6},
"targets": [
{"refId": "A", "expr": "sum by (tool) (rate(argos_tool_calls_total{instance=~\"$instance\"}[5m]))", "legendFormat": "{{tool}}"}
],
"fieldConfig": {"defaults": {"unit": "ops"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 12,
"type": "timeseries",
"title": "Tool error ratio by tool (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 6},
"targets": [
{"refId": "A", "expr": "sum by (tool) (rate(argos_tool_calls_total{status=\"error\", instance=~\"$instance\"}[5m])) / clamp_min(sum by (tool) (rate(argos_tool_calls_total{instance=~\"$instance\"}[5m])), 0.0001)", "legendFormat": "{{tool}}"}
],
"fieldConfig": {"defaults": {"unit": "percentunit"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 13,
"type": "timeseries",
"title": "Tool latency p50 / p95 / p99 (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 14},
"targets": [
{"refId": "A", "expr": "histogram_quantile(0.50, sum by (le) (rate(argos_tool_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "legendFormat": "p50"},
{"refId": "B", "expr": "histogram_quantile(0.95, sum by (le) (rate(argos_tool_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "legendFormat": "p95"},
{"refId": "C", "expr": "histogram_quantile(0.99, sum by (le) (rate(argos_tool_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "legendFormat": "p99"}
],
"fieldConfig": {"defaults": {"unit": "s"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 14,
"type": "timeseries",
"title": "Tool latency p95 by tool (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 14},
"targets": [
{"refId": "A", "expr": "histogram_quantile(0.95, sum by (le, tool) (rate(argos_tool_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "legendFormat": "{{tool}}"}
],
"fieldConfig": {"defaults": {"unit": "s"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 20,
"type": "row",
"title": "Upstream SearXNG",
"collapsed": false,
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 22}
},
{
"id": 21,
"type": "table",
"title": "SearXNG instances (per-Argos view)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 6, "w": 12, "x": 0, "y": 23},
"targets": [
{"refId": "A", "expr": "argos_searxng_instance_up{instance=~\"$instance\"}", "legendFormat": "{{searxng_instance}}", "format": "table", "instant": true}
],
"transformations": [
{"id": "organize", "options": {"excludeByName": {"Time": true, "__name__": true, "job": true, "environment": true, "hostname": true}}}
],
"fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "custom": {"cellOptions": {"type": "color-background"}}}}
},
{
"id": 22,
"type": "timeseries",
"title": "Upstream SearXNG requests/sec by instance (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 6, "w": 12, "x": 12, "y": 23},
"targets": [
{"refId": "A", "expr": "sum by (instance, searxng_instance) (rate(argos_searxng_requests_total{instance=~\"$instance\"}[5m]))", "legendFormat": "{{instance}} → {{searxng_instance}}"}
],
"fieldConfig": {"defaults": {"unit": "ops"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 23,
"type": "timeseries",
"title": "Upstream SearXNG error ratio by instance (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 6, "w": 12, "x": 0, "y": 29},
"targets": [
{"refId": "A", "expr": "sum by (searxng_instance) (rate(argos_searxng_requests_total{status=\"error\", instance=~\"$instance\"}[5m])) / clamp_min(sum by (searxng_instance) (rate(argos_searxng_requests_total{instance=~\"$instance\"}[5m])), 0.0001)", "legendFormat": "{{searxng_instance}}"}
],
"fieldConfig": {"defaults": {"unit": "percentunit"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 24,
"type": "timeseries",
"title": "Upstream SearXNG latency p95 by instance (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 6, "w": 12, "x": 12, "y": 29},
"targets": [
{"refId": "A", "expr": "histogram_quantile(0.95, sum by (le, searxng_instance) (rate(argos_searxng_request_duration_seconds_bucket{instance=~\"$instance\"}[5m])))", "legendFormat": "{{searxng_instance}} p95"}
],
"fieldConfig": {"defaults": {"unit": "s"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 30,
"type": "row",
"title": "Cache & webpage fetch",
"collapsed": false,
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 35}
},
{
"id": 31,
"type": "stat",
"title": "Cache hit ratio (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 6, "x": 0, "y": 36},
"targets": [
{"refId": "A", "expr": "sum(rate(argos_cache_operations_total{operation=\"get\", result=\"hit\", instance=~\"$instance\"}[5m])) / clamp_min(sum(rate(argos_cache_operations_total{operation=\"get\", instance=~\"$instance\"}[5m])), 0.0001)", "legendFormat": "hits"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value", "graphMode": "area"},
"fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "orange", "value": 0.10}, {"color": "green", "value": 0.30}]}}}
},
{
"id": 32,
"type": "timeseries",
"title": "Cache ops/sec by result (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 9, "x": 6, "y": 36},
"targets": [
{"refId": "A", "expr": "sum by (operation, result) (rate(argos_cache_operations_total{instance=~\"$instance\"}[5m]))", "legendFormat": "{{operation}}/{{result}}"}
],
"fieldConfig": {"defaults": {"unit": "ops"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 33,
"type": "timeseries",
"title": "Webpage fetch outcomes/sec (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 9, "x": 15, "y": 36},
"targets": [
{"refId": "A", "expr": "sum by (status) (rate(argos_webpage_fetch_total{instance=~\"$instance\"}[5m]))", "legendFormat": "{{status}}"}
],
"fieldConfig": {"defaults": {"unit": "ops"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 90,
"type": "row",
"title": "Logs",
"collapsed": false,
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 44}
},
{
"id": 91,
"type": "logs",
"title": "argos (Loki)",
"datasource": {"type": "loki", "uid": "${loki}"},
"gridPos": {"h": 12, "w": 24, "x": 0, "y": 45},
"targets": [
{"refId": "A", "expr": "{job=\"argos\"}"}
],
"options": {"showTime": true, "wrapLogMessage": true, "enableLogDetails": true, "dedupStrategy": "none"}
}
]
}

View File

@@ -0,0 +1,702 @@
{
"title": "Daedalus Stack",
"uid": "daedalus-stack",
"tags": ["daedalus", "mnemosyne", "pallas", "ouranos"],
"timezone": "browser",
"schemaVersion": 39,
"version": 1,
"editable": true,
"fiscalYearStartMonth": 0,
"weekStart": "",
"refresh": "30s",
"time": {"from": "now-1h", "to": "now"},
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [],
"targetBlank": true,
"title": "Neo4j dashboard",
"tooltip": "Detailed Neo4j metrics (ariel, umbriel)",
"type": "link",
"url": "/d/neo4j"
},
{
"asDropdown": false,
"icon": "doc",
"includeVars": true,
"keepTime": true,
"tags": [],
"targetBlank": true,
"title": "Explore Logs",
"tooltip": "Loki: daedalus + mnemosyne + pallas",
"type": "link",
"url": "/explore?orgId=1&left=%7B%22datasource%22:%22Loki%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22expr%22:%22%7Bservice%3D~%5C%22daedalus%7Cmnemosyne%7Cpallas%5C%22%7D%22%7D%5D%7D"
}
],
"templating": {
"list": [
{
"name": "prom",
"type": "datasource",
"query": "prometheus",
"current": {"selected": false, "text": "Prometheus", "value": "Prometheus"},
"hide": 0,
"label": "Prometheus datasource"
},
{
"name": "loki",
"type": "datasource",
"query": "loki",
"current": {"selected": false, "text": "Loki", "value": "Loki"},
"hide": 0,
"label": "Loki datasource"
},
{
"name": "pallas_inst",
"type": "query",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"query": "label_values(up{job=\"pallas\"}, instance)",
"refresh": 1,
"includeAll": true,
"multi": true,
"current": {"selected": true, "text": "All", "value": "$__all"},
"label": "Pallas instance"
},
{
"name": "agent",
"type": "query",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"query": "label_values(pallas_send_message_total{instance=~\"$pallas_inst\"}, agent)",
"refresh": 2,
"includeAll": true,
"multi": true,
"current": {"selected": true, "text": "All", "value": "$__all"},
"label": "Agent"
}
]
},
"panels": [
{
"id": 100,
"type": "row",
"title": "Summary",
"collapsed": false,
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}
},
{
"id": 101,
"type": "stat",
"title": "Daedalus",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 3, "x": 0, "y": 1},
"targets": [
{"refId": "A", "expr": "up{job=\"daedalus\"}", "legendFormat": "{{instance}}"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "background", "textMode": "value_and_name"},
"fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 1}]}}}
},
{
"id": 102,
"type": "stat",
"title": "Mnemosyne app",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 3, "x": 3, "y": 1},
"targets": [
{"refId": "A", "expr": "up{job=\"mnemosyne\", component=\"app\"}", "legendFormat": "app"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "background", "textMode": "value_and_name"},
"fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 1}]}}}
},
{
"id": 103,
"type": "stat",
"title": "Mnemosyne web",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 3, "x": 6, "y": 1},
"targets": [
{"refId": "A", "expr": "up{job=\"mnemosyne\", component=\"web\"}", "legendFormat": "web"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "background", "textMode": "value_and_name"},
"fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 1}]}}}
},
{
"id": 104,
"type": "stat",
"title": "Pallas up ratio",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 3, "x": 9, "y": 1},
"targets": [
{"refId": "A", "expr": "sum(up{job=\"pallas\"}) / count(up{job=\"pallas\"})", "legendFormat": "up ratio"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value"},
"fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "orange", "value": 0.67}, {"color": "green", "value": 1}]}}}
},
{
"id": 105,
"type": "stat",
"title": "Agents healthy",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 3, "x": 12, "y": 1},
"targets": [
{"refId": "A", "expr": "sum(daedalus_agents_by_health{status=\"ok\"}) / clamp_min(daedalus_agents_total, 1)", "legendFormat": "healthy"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value"},
"fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "orange", "value": 0.7}, {"color": "green", "value": 1}]}}}
},
{
"id": 106,
"type": "stat",
"title": "Chat p95 (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 3, "x": 15, "y": 1},
"targets": [
{"refId": "A", "expr": "histogram_quantile(0.95, sum by (le) (rate(daedalus_agent_response_duration_seconds_bucket{source=\"chat\"}[5m])))", "legendFormat": "chat p95"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value"},
"fieldConfig": {"defaults": {"unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 10}, {"color": "red", "value": 30}]}}}
},
{
"id": 107,
"type": "timeseries",
"title": "Stack up (last hour)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 6, "x": 18, "y": 1},
"targets": [
{"refId": "A", "expr": "up{job=~\"daedalus|mnemosyne|pallas\"}", "legendFormat": "{{job}} {{instance}} {{component}}"}
],
"fieldConfig": {"defaults": {"unit": "short", "min": 0, "max": 1, "custom": {"drawStyle": "line", "lineInterpolation": "stepBefore", "fillOpacity": 10}}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 200,
"type": "row",
"title": "Daedalus",
"collapsed": false,
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 5}
},
{
"id": 201,
"type": "stat",
"title": "Daedalus up",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 4, "x": 0, "y": 6},
"targets": [
{"refId": "A", "expr": "daedalus_up", "legendFormat": "daedalus"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "background", "textMode": "value_and_name"},
"fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 1}]}}}
},
{
"id": 202,
"type": "stat",
"title": "5xx error rate (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 4, "x": 4, "y": 6},
"targets": [
{"refId": "A", "expr": "sum(rate(daedalus_http_requests_total{status=~\"5..\"}[5m])) / clamp_min(sum(rate(daedalus_http_requests_total[5m])), 0.0001)", "legendFormat": "5xx"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value"},
"fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 0.01}, {"color": "red", "value": 0.05}]}}}
},
{
"id": 203,
"type": "stat",
"title": "MCP connections active",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 4, "x": 8, "y": 6},
"targets": [
{"refId": "A", "expr": "sum(daedalus_mcp_connections_active)", "legendFormat": "active"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value"},
"fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 1}]}}}
},
{
"id": 204,
"type": "stat",
"title": "Avg context window %",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 4, "x": 12, "y": 6},
"targets": [
{"refId": "A", "expr": "avg(daedalus_chat_context_pct)", "legendFormat": "avg"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value"},
"fieldConfig": {"defaults": {"unit": "percent", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 70}, {"color": "red", "value": 90}]}}}
},
{
"id": 205,
"type": "stat",
"title": "Tokens/sec (5m, total)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 8, "x": 16, "y": 6},
"targets": [
{"refId": "A", "expr": "sum(rate(daedalus_chat_tokens_total[5m]))", "legendFormat": "tok/s"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value", "graphMode": "area"},
"fieldConfig": {"defaults": {"unit": "short"}}
},
{
"id": 210,
"type": "timeseries",
"title": "Chat latency (p50/p95/p99)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 10},
"targets": [
{"refId": "A", "expr": "histogram_quantile(0.50, sum by (le) (rate(daedalus_agent_response_duration_seconds_bucket{source=\"chat\"}[5m])))", "legendFormat": "p50"},
{"refId": "B", "expr": "histogram_quantile(0.95, sum by (le) (rate(daedalus_agent_response_duration_seconds_bucket{source=\"chat\"}[5m])))", "legendFormat": "p95"},
{"refId": "C", "expr": "histogram_quantile(0.99, sum by (le) (rate(daedalus_agent_response_duration_seconds_bucket{source=\"chat\"}[5m])))", "legendFormat": "p99"}
],
"fieldConfig": {"defaults": {"unit": "s"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 211,
"type": "timeseries",
"title": "Voice pipeline p95 (STT / agent / TTS / total)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 10},
"targets": [
{"refId": "A", "expr": "histogram_quantile(0.95, sum by (le) (rate(daedalus_voice_stt_duration_seconds_bucket[5m])))", "legendFormat": "stt"},
{"refId": "B", "expr": "histogram_quantile(0.95, sum by (le) (rate(daedalus_voice_agent_duration_seconds_bucket[5m])))", "legendFormat": "agent"},
{"refId": "C", "expr": "histogram_quantile(0.95, sum by (le) (rate(daedalus_voice_tts_duration_seconds_bucket[5m])))", "legendFormat": "tts"},
{"refId": "D", "expr": "histogram_quantile(0.95, sum by (le) (rate(daedalus_voice_pipeline_duration_seconds_bucket[5m])))", "legendFormat": "total"}
],
"fieldConfig": {"defaults": {"unit": "s", "custom": {"stacking": {"mode": "none"}}}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 220,
"type": "timeseries",
"title": "Pallas reach — MCP error ratio by server (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 18},
"targets": [
{"refId": "A", "expr": "sum by (server) (rate(daedalus_mcp_requests_total{status=\"error\"}[5m])) / clamp_min(sum by (server) (rate(daedalus_mcp_requests_total[5m])), 0.0001)", "legendFormat": "{{server}}"}
],
"fieldConfig": {"defaults": {"unit": "percentunit"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 221,
"type": "timeseries",
"title": "Mnemosyne reach — p95 latency by operation (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 18},
"targets": [
{"refId": "A", "expr": "histogram_quantile(0.95, sum by (le, operation) (rate(daedalus_mnemosyne_request_duration_seconds_bucket[5m])))", "legendFormat": "{{operation}} p95"},
{"refId": "B", "expr": "sum(rate(daedalus_mnemosyne_requests_total{status=\"error\"}[5m]))", "legendFormat": "errors/s (right)"}
],
"fieldConfig": {"defaults": {"unit": "s"}, "overrides": [{"matcher": {"id": "byName", "options": "errors/s (right)"}, "properties": [{"id": "unit", "value": "ops"}, {"id": "custom.axisPlacement", "value": "right"}]}]},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 230,
"type": "timeseries",
"title": "Token burn by direction (tokens/sec, 5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 26},
"targets": [
{"refId": "A", "expr": "sum by (direction) (rate(daedalus_chat_tokens_total[5m]))", "legendFormat": "{{direction}}"}
],
"fieldConfig": {"defaults": {"unit": "short", "custom": {"drawStyle": "line", "fillOpacity": 20, "stacking": {"mode": "normal"}}}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 231,
"type": "timeseries",
"title": "Mnemosyne ingest jobs (status, 5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 26},
"targets": [
{"refId": "A", "expr": "sum by (status) (rate(daedalus_mnemosyne_ingest_jobs_total[5m]))", "legendFormat": "{{status}}"}
],
"fieldConfig": {"defaults": {"unit": "ops"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 300,
"type": "row",
"title": "Mnemosyne",
"collapsed": false,
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 34}
},
{
"id": 301,
"type": "stat",
"title": "App",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 4, "x": 0, "y": 35},
"targets": [
{"refId": "A", "expr": "up{job=\"mnemosyne\", component=\"app\"}", "legendFormat": "app"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "background", "textMode": "value_and_name"},
"fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 1}]}}}
},
{
"id": 302,
"type": "stat",
"title": "Web (nginx)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 4, "x": 4, "y": 35},
"targets": [
{"refId": "A", "expr": "up{job=\"mnemosyne\", component=\"web\"}", "legendFormat": "web"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "background", "textMode": "value_and_name"},
"fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 1}]}}}
},
{
"id": 303,
"type": "stat",
"title": "Search rate (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 4, "x": 8, "y": 35},
"targets": [
{"refId": "A", "expr": "sum(rate(mnemosyne_search_requests_total[5m]))", "legendFormat": "req/s"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value", "graphMode": "area"},
"fieldConfig": {"defaults": {"unit": "reqps"}}
},
{
"id": 304,
"type": "stat",
"title": "Embedding queue depth",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 4, "x": 12, "y": 35},
"targets": [
{"refId": "A", "expr": "mnemosyne_embedding_queue_size", "legendFormat": "queue"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value", "graphMode": "area"},
"fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 10}, {"color": "red", "value": 100}]}}}
},
{
"id": 305,
"type": "stat",
"title": "Pipeline in-progress",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 4, "x": 16, "y": 35},
"targets": [
{"refId": "A", "expr": "mnemosyne_pipeline_items_in_progress", "legendFormat": "in-flight"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value", "graphMode": "area"},
"fieldConfig": {"defaults": {"unit": "short"}}
},
{
"id": 306,
"type": "stat",
"title": "MCP tool error rate (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 4, "x": 20, "y": 35},
"targets": [
{"refId": "A", "expr": "sum(rate(mcp_tool_invocations_total{status=\"error\"}[5m])) / clamp_min(sum(rate(mcp_tool_invocations_total[5m])), 0.0001)", "legendFormat": "err"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value"},
"fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 0.01}, {"color": "red", "value": 0.05}]}}}
},
{
"id": 310,
"type": "timeseries",
"title": "Search rate by type (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 39},
"targets": [
{"refId": "A", "expr": "sum by (search_type) (rate(mnemosyne_search_requests_total[5m]))", "legendFormat": "{{search_type}}"}
],
"fieldConfig": {"defaults": {"unit": "reqps"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 311,
"type": "timeseries",
"title": "Search latency p95 by type (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 39},
"targets": [
{"refId": "A", "expr": "histogram_quantile(0.95, sum by (le, search_type) (rate(mnemosyne_search_duration_seconds_bucket[5m])))", "legendFormat": "{{search_type}} p95"},
{"refId": "B", "expr": "histogram_quantile(0.95, sum by (le) (rate(mnemosyne_search_total_duration_seconds_bucket[5m])))", "legendFormat": "end-to-end p95"}
],
"fieldConfig": {"defaults": {"unit": "s"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 320,
"type": "timeseries",
"title": "Embedding queue depth over time",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 8, "x": 0, "y": 47},
"targets": [
{"refId": "A", "expr": "mnemosyne_embedding_queue_size", "legendFormat": "queue"}
],
"fieldConfig": {"defaults": {"unit": "short", "custom": {"drawStyle": "line", "fillOpacity": 20}}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 321,
"type": "timeseries",
"title": "Embeddings generated (per sec, by model)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 8, "x": 8, "y": 47},
"targets": [
{"refId": "A", "expr": "sum by (model_name) (rate(mnemosyne_embeddings_generated_total[5m]))", "legendFormat": "{{model_name}}"}
],
"fieldConfig": {"defaults": {"unit": "ops"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 322,
"type": "timeseries",
"title": "Pipeline items (per sec, by status)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 8, "x": 16, "y": 47},
"targets": [
{"refId": "A", "expr": "sum by (status) (rate(mnemosyne_pipeline_items_total[5m]))", "legendFormat": "{{status}}"},
{"refId": "B", "expr": "sum by (error_type) (rate(mnemosyne_embedding_api_errors_total[5m]))", "legendFormat": "api err: {{error_type}}"}
],
"fieldConfig": {"defaults": {"unit": "ops"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 330,
"type": "timeseries",
"title": "Neo4j @ umbriel — transactions (rate / open)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 55},
"targets": [
{"refId": "A", "expr": "rate(neo4j_monitor_tx_totalOpenedTx{instance=~\"umbriel.*\"}[5m])", "legendFormat": "{{instance}} open rate"},
{"refId": "B", "expr": "neo4j_monitor_tx_currentOpenedTx{instance=~\"umbriel.*\"}", "legendFormat": "{{instance}} current open"}
],
"fieldConfig": {"defaults": {"unit": "short"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 331,
"type": "stat",
"title": "Neo4j @ umbriel — rollback ratio (10m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 6, "x": 12, "y": 55},
"targets": [
{"refId": "A", "expr": "rate(neo4j_monitor_tx_rolledBackTx{instance=~\"umbriel.*\"}[10m]) / clamp_min(rate(neo4j_monitor_tx_totalOpenedTx{instance=~\"umbriel.*\"}[10m]), 0.0001)", "legendFormat": "{{instance}}"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "value_and_name"},
"fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 0.05}, {"color": "red", "value": 0.10}]}}}
},
{
"id": 332,
"type": "stat",
"title": "Neo4j @ umbriel — store size",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 6, "x": 18, "y": 55},
"targets": [
{"refId": "A", "expr": "neo4j_monitor_store_totalStoreSize{instance=~\"umbriel.*\"}", "legendFormat": "{{instance}}"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "value_and_name"},
"fieldConfig": {"defaults": {"unit": "bytes"}}
},
{
"id": 400,
"type": "row",
"title": "Pallas Agents",
"collapsed": false,
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 63}
},
{
"id": 401,
"type": "stat",
"title": "Instance up",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 6, "x": 0, "y": 64},
"targets": [
{"refId": "A", "expr": "up{job=\"pallas\", instance=~\"$pallas_inst\"}", "legendFormat": "{{instance}}"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": ""}, "colorMode": "background", "textMode": "value_and_name"},
"fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 1}]}}}
},
{
"id": 402,
"type": "stat",
"title": "Aggregate agent health (min)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 6, "x": 6, "y": 64},
"targets": [
{"refId": "A", "expr": "min by (instance) (pallas_agent_health_status{instance=~\"$pallas_inst\"})", "legendFormat": "{{instance}}"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value_and_name"},
"fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "ERROR", "color": "red"}, "0.5": {"text": "DEGRADED", "color": "orange"}, "1": {"text": "OK", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "orange", "value": 0.5}, {"color": "green", "value": 1}]}}}
},
{
"id": 403,
"type": "stat",
"title": "Downstream MCPs up (ratio)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 6, "x": 12, "y": 64},
"targets": [
{"refId": "A", "expr": "sum by (instance) (pallas_downstream_up{instance=~\"$pallas_inst\"}) / clamp_min(count by (instance) (pallas_downstream_up{instance=~\"$pallas_inst\"}), 1)", "legendFormat": "{{instance}}"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value_and_name"},
"fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "orange", "value": 0.5}, {"color": "green", "value": 1}]}}}
},
{
"id": 404,
"type": "stat",
"title": "Turn error ratio (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 6, "x": 18, "y": 64},
"targets": [
{"refId": "A", "expr": "sum by (instance) (rate(pallas_send_message_total{outcome=\"error\", instance=~\"$pallas_inst\"}[5m])) / clamp_min(sum by (instance) (rate(pallas_send_message_total{instance=~\"$pallas_inst\"}[5m])), 0.0001)", "legendFormat": "{{instance}}"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value_and_name"},
"fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 0.01}, {"color": "red", "value": 0.05}]}}}
},
{
"id": 410,
"type": "timeseries",
"title": "Turn latency p95 by agent (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 68},
"targets": [
{"refId": "A", "expr": "histogram_quantile(0.95, sum by (le, agent, instance) (rate(pallas_send_message_duration_seconds_bucket{instance=~\"$pallas_inst\", agent=~\"$agent\"}[5m])))", "legendFormat": "{{instance}}/{{agent}}"}
],
"fieldConfig": {"defaults": {"unit": "s"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 411,
"type": "table",
"title": "Long-running agents — p99 turn (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 68},
"targets": [
{"refId": "A", "expr": "histogram_quantile(0.99, sum by (agent, instance, le) (rate(pallas_send_message_duration_seconds_bucket{instance=~\"$pallas_inst\", agent=~\"$agent\"}[5m])))", "legendFormat": "", "format": "table", "instant": true}
],
"fieldConfig": {"defaults": {"unit": "s", "custom": {"align": "auto"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 30}, {"color": "red", "value": 60}]}, "color": {"mode": "thresholds"}}, "overrides": [{"matcher": {"id": "byName", "options": "Value"}, "properties": [{"id": "displayName", "value": "p99 (s)"}, {"id": "custom.cellOptions", "value": {"type": "color-background"}}]}]},
"options": {"showHeader": true, "sortBy": [{"displayName": "p99 (s)", "desc": true}]},
"transformations": [{"id": "organize", "options": {"excludeByName": {"Time": true, "__name__": true, "job": true}}}]
},
{
"id": 420,
"type": "timeseries",
"title": "Turn errors per agent (15m increase)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 76},
"targets": [
{"refId": "A", "expr": "sum by (agent, instance) (increase(pallas_send_message_total{outcome=\"error\", instance=~\"$pallas_inst\", agent=~\"$agent\"}[15m]))", "legendFormat": "{{instance}}/{{agent}}"}
],
"fieldConfig": {"defaults": {"unit": "short"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 421,
"type": "timeseries",
"title": "Tokens/sec by kind (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 76},
"targets": [
{"refId": "A", "expr": "sum by (kind) (rate(pallas_llm_tokens_total{instance=~\"$pallas_inst\", agent=~\"$agent\"}[5m]))", "legendFormat": "{{kind}}"}
],
"fieldConfig": {"defaults": {"unit": "short", "custom": {"drawStyle": "line", "fillOpacity": 20, "stacking": {"mode": "normal"}}}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 422,
"type": "table",
"title": "Top-burning agents (24h, input+output tokens)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 84},
"targets": [
{"refId": "A", "expr": "topk(10, sum by (agent, model, instance) (increase(pallas_llm_tokens_total{kind=~\"input|output\", instance=~\"$pallas_inst\", agent=~\"$agent\"}[24h])))", "legendFormat": "", "format": "table", "instant": true}
],
"fieldConfig": {"defaults": {"unit": "short"}, "overrides": [{"matcher": {"id": "byName", "options": "Value"}, "properties": [{"id": "displayName", "value": "tokens (24h)"}, {"id": "custom.cellOptions", "value": {"type": "gauge", "mode": "gradient"}}]}]},
"options": {"showHeader": true, "sortBy": [{"displayName": "tokens (24h)", "desc": true}]},
"transformations": [{"id": "organize", "options": {"excludeByName": {"Time": true, "__name__": true, "job": true}}}]
},
{
"id": 423,
"type": "stat",
"title": "Cache effectiveness (1h)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 6, "x": 12, "y": 84},
"targets": [
{"refId": "A", "expr": "sum(rate(pallas_llm_tokens_total{kind=\"cache_read\", instance=~\"$pallas_inst\"}[1h])) / clamp_min(sum(rate(pallas_llm_tokens_total{kind=~\"input|cache_read\", instance=~\"$pallas_inst\"}[1h])), 0.0001)", "legendFormat": "cache hit"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value", "graphMode": "area"},
"fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "orange", "value": 0.2}, {"color": "green", "value": 0.5}]}}}
},
{
"id": 424,
"type": "stat",
"title": "LLM turns/sec (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 6, "x": 18, "y": 84},
"targets": [
{"refId": "A", "expr": "sum(rate(pallas_llm_turns_total{instance=~\"$pallas_inst\", agent=~\"$agent\"}[5m]))", "legendFormat": "turns/s"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value", "graphMode": "area"},
"fieldConfig": {"defaults": {"unit": "ops"}}
},
{
"id": 430,
"type": "timeseries",
"title": "Cypher tool calls — rate by outcome (Pallas → ariel)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 92},
"targets": [
{"refId": "A", "expr": "sum by (outcome) (rate(pallas_tool_calls_total{server=\"neo4j_cypher\", instance=~\"$pallas_inst\", agent=~\"$agent\"}[5m]))", "legendFormat": "{{outcome}}"}
],
"fieldConfig": {"defaults": {"unit": "ops"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 431,
"type": "timeseries",
"title": "Cypher tool calls — p95 latency by agent",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 92},
"targets": [
{"refId": "A", "expr": "histogram_quantile(0.95, sum by (le, agent, instance) (rate(pallas_tool_call_duration_seconds_bucket{server=\"neo4j_cypher\", instance=~\"$pallas_inst\", agent=~\"$agent\"}[5m])))", "legendFormat": "{{instance}}/{{agent}}"}
],
"fieldConfig": {"defaults": {"unit": "s"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 440,
"type": "timeseries",
"title": "Neo4j @ ariel — transactions (rate / open)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 100},
"targets": [
{"refId": "A", "expr": "rate(neo4j_monitor_tx_totalOpenedTx{instance=~\"ariel.*\"}[5m])", "legendFormat": "{{instance}} open rate"},
{"refId": "B", "expr": "neo4j_monitor_tx_currentOpenedTx{instance=~\"ariel.*\"}", "legendFormat": "{{instance}} current open"}
],
"fieldConfig": {"defaults": {"unit": "short"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 441,
"type": "stat",
"title": "Neo4j @ ariel — rollback ratio (10m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 6, "x": 12, "y": 100},
"targets": [
{"refId": "A", "expr": "rate(neo4j_monitor_tx_rolledBackTx{instance=~\"ariel.*\"}[10m]) / clamp_min(rate(neo4j_monitor_tx_totalOpenedTx{instance=~\"ariel.*\"}[10m]), 0.0001)", "legendFormat": "{{instance}}"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "value_and_name"},
"fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 0.05}, {"color": "red", "value": 0.10}]}}}
},
{
"id": 442,
"type": "stat",
"title": "Neo4j @ ariel — store size",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 6, "x": 18, "y": 100},
"targets": [
{"refId": "A", "expr": "neo4j_monitor_store_totalStoreSize{instance=~\"ariel.*\"}", "legendFormat": "{{instance}}"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "value_and_name"},
"fieldConfig": {"defaults": {"unit": "bytes"}}
}
]
}

202
dashboards/searxng.json Normal file
View File

@@ -0,0 +1,202 @@
{
"title": "SearXNG",
"uid": "searxng",
"tags": ["searxng", "argos", "ouranos"],
"timezone": "browser",
"schemaVersion": 39,
"version": 1,
"editable": true,
"fiscalYearStartMonth": 0,
"weekStart": "",
"refresh": "30s",
"time": {"from": "now-1h", "to": "now"},
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [],
"targetBlank": true,
"title": "Argos dashboard",
"tooltip": "Argos MCP server using these SearXNG instances",
"type": "link",
"url": "/d/argos"
},
{
"asDropdown": false,
"icon": "doc",
"includeVars": true,
"keepTime": true,
"tags": [],
"targetBlank": true,
"title": "SearXNG logs",
"tooltip": "Loki: {job=\"searxng\"}",
"type": "link",
"url": "/explore?orgId=1&left=%7B%22datasource%22:%22Loki%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22expr%22:%22%7Bjob%3D%5C%22searxng%5C%22%7D%22%7D%5D%7D"
}
],
"templating": {
"list": [
{
"name": "prom",
"type": "datasource",
"query": "prometheus",
"current": {"selected": false, "text": "Prometheus", "value": "Prometheus"},
"hide": 0,
"label": "Prometheus datasource"
},
{
"name": "loki",
"type": "datasource",
"query": "loki",
"current": {"selected": false, "text": "Loki", "value": "Loki"},
"hide": 0,
"label": "Loki datasource"
},
{
"name": "host",
"type": "query",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"query": "label_values(probe_success{service=\"searxng\"}, hostname)",
"refresh": 1,
"includeAll": true,
"multi": true,
"current": {"selected": true, "text": "All", "value": "$__all"},
"label": "SearXNG host"
}
]
},
"panels": [
{
"id": 1,
"type": "row",
"title": "Independent probe (Alloy blackbox /healthz)",
"collapsed": false,
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}
},
{
"id": 2,
"type": "stat",
"title": "SearXNG /healthz",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 5, "w": 8, "x": 0, "y": 1},
"targets": [
{"refId": "A", "expr": "probe_success{service=\"searxng\", hostname=~\"$host\"}", "legendFormat": "{{hostname}}"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "background", "textMode": "value_and_name"},
"fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 1}]}}}
},
{
"id": 3,
"type": "stat",
"title": "Last probe HTTP status",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 5, "w": 8, "x": 8, "y": 1},
"targets": [
{"refId": "A", "expr": "probe_http_status_code{service=\"searxng\", hostname=~\"$host\"}", "legendFormat": "{{hostname}}"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value_and_name"},
"fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 200}, {"color": "orange", "value": 300}, {"color": "red", "value": 400}]}}}
},
{
"id": 4,
"type": "stat",
"title": "Probe duration (last)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 5, "w": 8, "x": 16, "y": 1},
"targets": [
{"refId": "A", "expr": "probe_duration_seconds{service=\"searxng\", hostname=~\"$host\"}", "legendFormat": "{{hostname}}"}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value_and_name", "graphMode": "area"},
"fieldConfig": {"defaults": {"unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 1}, {"color": "red", "value": 3}]}}}
},
{
"id": 5,
"type": "timeseries",
"title": "Probe success over time",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 6},
"targets": [
{"refId": "A", "expr": "probe_success{service=\"searxng\", hostname=~\"$host\"}", "legendFormat": "{{hostname}}"}
],
"fieldConfig": {"defaults": {"unit": "short", "min": 0, "max": 1, "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 20}}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 6,
"type": "timeseries",
"title": "Probe duration over time",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 6},
"targets": [
{"refId": "A", "expr": "probe_duration_seconds{service=\"searxng\", hostname=~\"$host\"}", "legendFormat": "{{hostname}}"}
],
"fieldConfig": {"defaults": {"unit": "s"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 10,
"type": "row",
"title": "Argos's view of these instances",
"collapsed": false,
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 14}
},
{
"id": 11,
"type": "timeseries",
"title": "argos_searxng_instance_up by SearXNG instance",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 15},
"targets": [
{"refId": "A", "expr": "argos_searxng_instance_up", "legendFormat": "{{searxng_instance}}"}
],
"fieldConfig": {"defaults": {"unit": "short", "min": 0, "max": 1, "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 20}}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 12,
"type": "timeseries",
"title": "Search latency p95 from Argos (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 15},
"targets": [
{"refId": "A", "expr": "histogram_quantile(0.95, sum by (le, searxng_instance) (rate(argos_searxng_request_duration_seconds_bucket[5m])))", "legendFormat": "{{searxng_instance}} p95"}
],
"fieldConfig": {"defaults": {"unit": "s"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 13,
"type": "timeseries",
"title": "Search request error ratio from Argos (5m)",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 8, "w": 24, "x": 0, "y": 23},
"targets": [
{"refId": "A", "expr": "sum by (searxng_instance) (rate(argos_searxng_requests_total{status=\"error\"}[5m])) / clamp_min(sum by (searxng_instance) (rate(argos_searxng_requests_total[5m])), 0.0001)", "legendFormat": "{{searxng_instance}}"}
],
"fieldConfig": {"defaults": {"unit": "percentunit"}},
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 90,
"type": "row",
"title": "Logs",
"collapsed": false,
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 31}
},
{
"id": 91,
"type": "logs",
"title": "searxng (Loki)",
"datasource": {"type": "loki", "uid": "${loki}"},
"gridPos": {"h": 12, "w": 24, "x": 0, "y": 32},
"targets": [
{"refId": "A", "expr": "{job=\"searxng\"}"}
],
"options": {"showTime": true, "wrapLogMessage": true, "enableLogDetails": true, "dedupStrategy": "none"}
}
]
}