{ "title": "Daedalus Stack", "uid": "daedalus-stack", "tags": ["daedalus", "mnemosyne", "pallas", "ouranos"], "timezone": "browser", "schemaVersion": 39, "version": 1, "editable": true, "fiscalYearStartMonth": 0, "weekStart": "", "refresh": "30s", "time": {"from": "now-1h", "to": "now"}, "links": [ { "asDropdown": false, "icon": "external link", "includeVars": true, "keepTime": true, "tags": [], "targetBlank": true, "title": "Neo4j dashboard", "tooltip": "Detailed Neo4j metrics (ariel, umbriel)", "type": "link", "url": "/d/neo4j" }, { "asDropdown": false, "icon": "doc", "includeVars": true, "keepTime": true, "tags": [], "targetBlank": true, "title": "Explore Logs", "tooltip": "Loki: daedalus + mnemosyne + pallas", "type": "link", "url": "/explore?orgId=1&left=%7B%22datasource%22:%22Loki%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22expr%22:%22%7Bservice%3D~%5C%22daedalus%7Cmnemosyne%7Cpallas%5C%22%7D%22%7D%5D%7D" } ], "templating": { "list": [ { "name": "prom", "type": "datasource", "query": "prometheus", "current": {"selected": false, "text": "Prometheus", "value": "Prometheus"}, "hide": 0, "label": "Prometheus datasource" }, { "name": "loki", "type": "datasource", "query": "loki", "current": {"selected": false, "text": "Loki", "value": "Loki"}, "hide": 0, "label": "Loki datasource" }, { "name": "pallas_inst", "type": "query", "datasource": {"type": "prometheus", "uid": "${prom}"}, "query": "label_values(up{job=\"pallas\"}, instance)", "refresh": 1, "includeAll": true, "multi": true, "current": {"selected": true, "text": "All", "value": "$__all"}, "label": "Pallas instance" }, { "name": "agent", "type": "query", "datasource": {"type": "prometheus", "uid": "${prom}"}, "query": "label_values(pallas_send_message_total{instance=~\"$pallas_inst\"}, agent)", "refresh": 2, "includeAll": true, "multi": true, "current": {"selected": true, "text": "All", "value": "$__all"}, "label": "Agent" } ] }, "panels": [ { "id": 100, "type": "row", "title": "Summary", "collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0} }, { "id": 101, "type": "stat", "title": "Daedalus", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 3, "x": 0, "y": 1}, "targets": [ {"refId": "A", "expr": "up{job=\"daedalus\"}", "legendFormat": "{{instance}}"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "background", "textMode": "value_and_name"}, "fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 1}]}}} }, { "id": 102, "type": "stat", "title": "Mnemosyne app", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 3, "x": 3, "y": 1}, "targets": [ {"refId": "A", "expr": "up{job=\"mnemosyne\", component=\"app\"}", "legendFormat": "app"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "background", "textMode": "value_and_name"}, "fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 1}]}}} }, { "id": 103, "type": "stat", "title": "Mnemosyne web", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 3, "x": 6, "y": 1}, "targets": [ {"refId": "A", "expr": "up{job=\"mnemosyne\", component=\"web\"}", "legendFormat": "web"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "background", "textMode": "value_and_name"}, "fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 1}]}}} }, { "id": 104, "type": "stat", "title": "Pallas up ratio", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 3, "x": 9, "y": 1}, "targets": [ {"refId": "A", "expr": "sum(up{job=\"pallas\"}) / count(up{job=\"pallas\"})", "legendFormat": "up ratio"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value"}, "fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "orange", "value": 0.67}, {"color": "green", "value": 1}]}}} }, { "id": 105, "type": "stat", "title": "Agents healthy", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 3, "x": 12, "y": 1}, "targets": [ {"refId": "A", "expr": "sum(daedalus_agents_by_health{status=\"ok\"}) / clamp_min(daedalus_agents_total, 1)", "legendFormat": "healthy"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value"}, "fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "orange", "value": 0.7}, {"color": "green", "value": 1}]}}} }, { "id": 106, "type": "stat", "title": "Chat p95 (5m)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 3, "x": 15, "y": 1}, "targets": [ {"refId": "A", "expr": "histogram_quantile(0.95, sum by (le) (rate(daedalus_agent_response_duration_seconds_bucket{source=\"chat\"}[5m])))", "legendFormat": "chat p95"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value"}, "fieldConfig": {"defaults": {"unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 10}, {"color": "red", "value": 30}]}}} }, { "id": 107, "type": "timeseries", "title": "Stack up (last hour)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 6, "x": 18, "y": 1}, "targets": [ {"refId": "A", "expr": "up{job=~\"daedalus|mnemosyne|pallas\"}", "legendFormat": "{{job}} {{instance}} {{component}}"} ], "fieldConfig": {"defaults": {"unit": "short", "min": 0, "max": 1, "custom": {"drawStyle": "line", "lineInterpolation": "stepBefore", "fillOpacity": 10}}}, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 200, "type": "row", "title": "Daedalus", "collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 5} }, { "id": 201, "type": "stat", "title": "Daedalus up", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 4, "x": 0, "y": 6}, "targets": [ {"refId": "A", "expr": "daedalus_up", "legendFormat": "daedalus"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "background", "textMode": "value_and_name"}, "fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 1}]}}} }, { "id": 202, "type": "stat", "title": "5xx error rate (5m)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 4, "x": 4, "y": 6}, "targets": [ {"refId": "A", "expr": "sum(rate(daedalus_http_requests_total{status=~\"5..\"}[5m])) / clamp_min(sum(rate(daedalus_http_requests_total[5m])), 0.0001)", "legendFormat": "5xx"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value"}, "fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 0.01}, {"color": "red", "value": 0.05}]}}} }, { "id": 203, "type": "stat", "title": "MCP connections active", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 4, "x": 8, "y": 6}, "targets": [ {"refId": "A", "expr": "sum(daedalus_mcp_connections_active)", "legendFormat": "active"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value"}, "fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 1}]}}} }, { "id": 204, "type": "stat", "title": "Avg context window %", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 4, "x": 12, "y": 6}, "targets": [ {"refId": "A", "expr": "avg(daedalus_chat_context_pct)", "legendFormat": "avg"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value"}, "fieldConfig": {"defaults": {"unit": "percent", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 70}, {"color": "red", "value": 90}]}}} }, { "id": 205, "type": "stat", "title": "Tokens/sec (5m, total)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 8, "x": 16, "y": 6}, "targets": [ {"refId": "A", "expr": "sum(rate(daedalus_chat_tokens_total[5m]))", "legendFormat": "tok/s"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value", "graphMode": "area"}, "fieldConfig": {"defaults": {"unit": "short"}} }, { "id": 210, "type": "timeseries", "title": "Chat latency (p50/p95/p99)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 10}, "targets": [ {"refId": "A", "expr": "histogram_quantile(0.50, sum by (le) (rate(daedalus_agent_response_duration_seconds_bucket{source=\"chat\"}[5m])))", "legendFormat": "p50"}, {"refId": "B", "expr": "histogram_quantile(0.95, sum by (le) (rate(daedalus_agent_response_duration_seconds_bucket{source=\"chat\"}[5m])))", "legendFormat": "p95"}, {"refId": "C", "expr": "histogram_quantile(0.99, sum by (le) (rate(daedalus_agent_response_duration_seconds_bucket{source=\"chat\"}[5m])))", "legendFormat": "p99"} ], "fieldConfig": {"defaults": {"unit": "s"}}, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 211, "type": "timeseries", "title": "Voice pipeline p95 (STT / agent / TTS / total)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 10}, "targets": [ {"refId": "A", "expr": "histogram_quantile(0.95, sum by (le) (rate(daedalus_voice_stt_duration_seconds_bucket[5m])))", "legendFormat": "stt"}, {"refId": "B", "expr": "histogram_quantile(0.95, sum by (le) (rate(daedalus_voice_agent_duration_seconds_bucket[5m])))", "legendFormat": "agent"}, {"refId": "C", "expr": "histogram_quantile(0.95, sum by (le) (rate(daedalus_voice_tts_duration_seconds_bucket[5m])))", "legendFormat": "tts"}, {"refId": "D", "expr": "histogram_quantile(0.95, sum by (le) (rate(daedalus_voice_pipeline_duration_seconds_bucket[5m])))", "legendFormat": "total"} ], "fieldConfig": {"defaults": {"unit": "s", "custom": {"stacking": {"mode": "none"}}}}, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 220, "type": "timeseries", "title": "Pallas reach — MCP error ratio by server (5m)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 18}, "targets": [ {"refId": "A", "expr": "sum by (server) (rate(daedalus_mcp_requests_total{status=\"error\"}[5m])) / clamp_min(sum by (server) (rate(daedalus_mcp_requests_total[5m])), 0.0001)", "legendFormat": "{{server}}"} ], "fieldConfig": {"defaults": {"unit": "percentunit"}}, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 221, "type": "timeseries", "title": "Mnemosyne reach — p95 latency by operation (5m)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 18}, "targets": [ {"refId": "A", "expr": "histogram_quantile(0.95, sum by (le, operation) (rate(daedalus_mnemosyne_request_duration_seconds_bucket[5m])))", "legendFormat": "{{operation}} p95"}, {"refId": "B", "expr": "sum(rate(daedalus_mnemosyne_requests_total{status=\"error\"}[5m]))", "legendFormat": "errors/s (right)"} ], "fieldConfig": {"defaults": {"unit": "s"}, "overrides": [{"matcher": {"id": "byName", "options": "errors/s (right)"}, "properties": [{"id": "unit", "value": "ops"}, {"id": "custom.axisPlacement", "value": "right"}]}]}, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 230, "type": "timeseries", "title": "Token burn by direction (tokens/sec, 5m)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 26}, "targets": [ {"refId": "A", "expr": "sum by (direction) (rate(daedalus_chat_tokens_total[5m]))", "legendFormat": "{{direction}}"} ], "fieldConfig": {"defaults": {"unit": "short", "custom": {"drawStyle": "line", "fillOpacity": 20, "stacking": {"mode": "normal"}}}}, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 231, "type": "timeseries", "title": "Mnemosyne ingest jobs (status, 5m)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 26}, "targets": [ {"refId": "A", "expr": "sum by (status) (rate(daedalus_mnemosyne_ingest_jobs_total[5m]))", "legendFormat": "{{status}}"} ], "fieldConfig": {"defaults": {"unit": "ops"}}, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 300, "type": "row", "title": "Mnemosyne", "collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 34} }, { "id": 301, "type": "stat", "title": "App", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 4, "x": 0, "y": 35}, "targets": [ {"refId": "A", "expr": "up{job=\"mnemosyne\", component=\"app\"}", "legendFormat": "app"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "background", "textMode": "value_and_name"}, "fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 1}]}}} }, { "id": 302, "type": "stat", "title": "Web (nginx)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 4, "x": 4, "y": 35}, "targets": [ {"refId": "A", "expr": "up{job=\"mnemosyne\", component=\"web\"}", "legendFormat": "web"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "background", "textMode": "value_and_name"}, "fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 1}]}}} }, { "id": 303, "type": "stat", "title": "Search rate (5m)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 4, "x": 8, "y": 35}, "targets": [ {"refId": "A", "expr": "sum(rate(mnemosyne_search_requests_total[5m]))", "legendFormat": "req/s"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value", "graphMode": "area"}, "fieldConfig": {"defaults": {"unit": "reqps"}} }, { "id": 304, "type": "stat", "title": "Embedding queue depth", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 4, "x": 12, "y": 35}, "targets": [ {"refId": "A", "expr": "mnemosyne_embedding_queue_size", "legendFormat": "queue"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value", "graphMode": "area"}, "fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 10}, {"color": "red", "value": 100}]}}} }, { "id": 305, "type": "stat", "title": "Pipeline in-progress", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 4, "x": 16, "y": 35}, "targets": [ {"refId": "A", "expr": "mnemosyne_pipeline_items_in_progress", "legendFormat": "in-flight"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value", "graphMode": "area"}, "fieldConfig": {"defaults": {"unit": "short"}} }, { "id": 306, "type": "stat", "title": "MCP tool error rate (5m)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 4, "x": 20, "y": 35}, "targets": [ {"refId": "A", "expr": "sum(rate(mcp_tool_invocations_total{status=\"error\"}[5m])) / clamp_min(sum(rate(mcp_tool_invocations_total[5m])), 0.0001)", "legendFormat": "err"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value"}, "fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 0.01}, {"color": "red", "value": 0.05}]}}} }, { "id": 310, "type": "timeseries", "title": "Search rate by type (5m)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 39}, "targets": [ {"refId": "A", "expr": "sum by (search_type) (rate(mnemosyne_search_requests_total[5m]))", "legendFormat": "{{search_type}}"} ], "fieldConfig": {"defaults": {"unit": "reqps"}}, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 311, "type": "timeseries", "title": "Search latency p95 by type (5m)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 39}, "targets": [ {"refId": "A", "expr": "histogram_quantile(0.95, sum by (le, search_type) (rate(mnemosyne_search_duration_seconds_bucket[5m])))", "legendFormat": "{{search_type}} p95"}, {"refId": "B", "expr": "histogram_quantile(0.95, sum by (le) (rate(mnemosyne_search_total_duration_seconds_bucket[5m])))", "legendFormat": "end-to-end p95"} ], "fieldConfig": {"defaults": {"unit": "s"}}, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 320, "type": "timeseries", "title": "Embedding queue depth over time", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 8, "w": 8, "x": 0, "y": 47}, "targets": [ {"refId": "A", "expr": "mnemosyne_embedding_queue_size", "legendFormat": "queue"} ], "fieldConfig": {"defaults": {"unit": "short", "custom": {"drawStyle": "line", "fillOpacity": 20}}}, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 321, "type": "timeseries", "title": "Embeddings generated (per sec, by model)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 8, "w": 8, "x": 8, "y": 47}, "targets": [ {"refId": "A", "expr": "sum by (model_name) (rate(mnemosyne_embeddings_generated_total[5m]))", "legendFormat": "{{model_name}}"} ], "fieldConfig": {"defaults": {"unit": "ops"}}, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 322, "type": "timeseries", "title": "Pipeline items (per sec, by status)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 8, "w": 8, "x": 16, "y": 47}, "targets": [ {"refId": "A", "expr": "sum by (status) (rate(mnemosyne_pipeline_items_total[5m]))", "legendFormat": "{{status}}"}, {"refId": "B", "expr": "sum by (error_type) (rate(mnemosyne_embedding_api_errors_total[5m]))", "legendFormat": "api err: {{error_type}}"} ], "fieldConfig": {"defaults": {"unit": "ops"}}, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 330, "type": "timeseries", "title": "Neo4j @ umbriel — transactions (rate / open)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 55}, "targets": [ {"refId": "A", "expr": "rate(neo4j_monitor_tx_totalOpenedTx{instance=~\"umbriel.*\"}[5m])", "legendFormat": "{{instance}} open rate"}, {"refId": "B", "expr": "neo4j_monitor_tx_currentOpenedTx{instance=~\"umbriel.*\"}", "legendFormat": "{{instance}} current open"} ], "fieldConfig": {"defaults": {"unit": "short"}}, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 331, "type": "stat", "title": "Neo4j @ umbriel — rollback ratio (10m)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 6, "x": 12, "y": 55}, "targets": [ {"refId": "A", "expr": "rate(neo4j_monitor_tx_rolledBackTx{instance=~\"umbriel.*\"}[10m]) / clamp_min(rate(neo4j_monitor_tx_totalOpenedTx{instance=~\"umbriel.*\"}[10m]), 0.0001)", "legendFormat": "{{instance}}"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "value_and_name"}, "fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 0.05}, {"color": "red", "value": 0.10}]}}} }, { "id": 332, "type": "stat", "title": "Neo4j @ umbriel — store size", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 6, "x": 18, "y": 55}, "targets": [ {"refId": "A", "expr": "neo4j_monitor_store_totalStoreSize{instance=~\"umbriel.*\"}", "legendFormat": "{{instance}}"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "value_and_name"}, "fieldConfig": {"defaults": {"unit": "bytes"}} }, { "id": 400, "type": "row", "title": "Pallas Agents", "collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 63} }, { "id": 401, "type": "stat", "title": "Instance up", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 6, "x": 0, "y": 64}, "targets": [ {"refId": "A", "expr": "up{job=\"pallas\", instance=~\"$pallas_inst\"}", "legendFormat": "{{instance}}"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": ""}, "colorMode": "background", "textMode": "value_and_name"}, "fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 1}]}}} }, { "id": 402, "type": "stat", "title": "Aggregate agent health (min)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 6, "x": 6, "y": 64}, "targets": [ {"refId": "A", "expr": "min by (instance) (pallas_agent_health_status{instance=~\"$pallas_inst\"})", "legendFormat": "{{instance}}"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value_and_name"}, "fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "ERROR", "color": "red"}, "0.5": {"text": "DEGRADED", "color": "orange"}, "1": {"text": "OK", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "orange", "value": 0.5}, {"color": "green", "value": 1}]}}} }, { "id": 403, "type": "stat", "title": "Downstream MCPs up (ratio)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 6, "x": 12, "y": 64}, "targets": [ {"refId": "A", "expr": "sum by (instance) (pallas_downstream_up{instance=~\"$pallas_inst\"}) / clamp_min(count by (instance) (pallas_downstream_up{instance=~\"$pallas_inst\"}), 1)", "legendFormat": "{{instance}}"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value_and_name"}, "fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "orange", "value": 0.5}, {"color": "green", "value": 1}]}}} }, { "id": 404, "type": "stat", "title": "Turn error ratio (5m)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 6, "x": 18, "y": 64}, "targets": [ {"refId": "A", "expr": "sum by (instance) (rate(pallas_send_message_total{outcome=\"error\", instance=~\"$pallas_inst\"}[5m])) / clamp_min(sum by (instance) (rate(pallas_send_message_total{instance=~\"$pallas_inst\"}[5m])), 0.0001)", "legendFormat": "{{instance}}"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value_and_name"}, "fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 0.01}, {"color": "red", "value": 0.05}]}}} }, { "id": 410, "type": "timeseries", "title": "Turn latency p95 by agent (5m)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 68}, "targets": [ {"refId": "A", "expr": "histogram_quantile(0.95, sum by (le, agent, instance) (rate(pallas_send_message_duration_seconds_bucket{instance=~\"$pallas_inst\", agent=~\"$agent\"}[5m])))", "legendFormat": "{{instance}}/{{agent}}"} ], "fieldConfig": {"defaults": {"unit": "s"}}, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 411, "type": "table", "title": "Long-running agents — p99 turn (5m)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 68}, "targets": [ {"refId": "A", "expr": "histogram_quantile(0.99, sum by (agent, instance, le) (rate(pallas_send_message_duration_seconds_bucket{instance=~\"$pallas_inst\", agent=~\"$agent\"}[5m])))", "legendFormat": "", "format": "table", "instant": true} ], "fieldConfig": {"defaults": {"unit": "s", "custom": {"align": "auto"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 30}, {"color": "red", "value": 60}]}, "color": {"mode": "thresholds"}}, "overrides": [{"matcher": {"id": "byName", "options": "Value"}, "properties": [{"id": "displayName", "value": "p99 (s)"}, {"id": "custom.cellOptions", "value": {"type": "color-background"}}]}]}, "options": {"showHeader": true, "sortBy": [{"displayName": "p99 (s)", "desc": true}]}, "transformations": [{"id": "organize", "options": {"excludeByName": {"Time": true, "__name__": true, "job": true}}}] }, { "id": 420, "type": "timeseries", "title": "Turn errors per agent (15m increase)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 76}, "targets": [ {"refId": "A", "expr": "sum by (agent, instance) (increase(pallas_send_message_total{outcome=\"error\", instance=~\"$pallas_inst\", agent=~\"$agent\"}[15m]))", "legendFormat": "{{instance}}/{{agent}}"} ], "fieldConfig": {"defaults": {"unit": "short"}}, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 421, "type": "timeseries", "title": "Tokens/sec by kind (5m)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 76}, "targets": [ {"refId": "A", "expr": "sum by (kind) (rate(pallas_llm_tokens_total{instance=~\"$pallas_inst\", agent=~\"$agent\"}[5m]))", "legendFormat": "{{kind}}"} ], "fieldConfig": {"defaults": {"unit": "short", "custom": {"drawStyle": "line", "fillOpacity": 20, "stacking": {"mode": "normal"}}}}, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 422, "type": "table", "title": "Top-burning agents (24h, input+output tokens)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 84}, "targets": [ {"refId": "A", "expr": "topk(10, sum by (agent, model, instance) (increase(pallas_llm_tokens_total{kind=~\"input|output\", instance=~\"$pallas_inst\", agent=~\"$agent\"}[24h])))", "legendFormat": "", "format": "table", "instant": true} ], "fieldConfig": {"defaults": {"unit": "short"}, "overrides": [{"matcher": {"id": "byName", "options": "Value"}, "properties": [{"id": "displayName", "value": "tokens (24h)"}, {"id": "custom.cellOptions", "value": {"type": "gauge", "mode": "gradient"}}]}]}, "options": {"showHeader": true, "sortBy": [{"displayName": "tokens (24h)", "desc": true}]}, "transformations": [{"id": "organize", "options": {"excludeByName": {"Time": true, "__name__": true, "job": true}}}] }, { "id": 423, "type": "stat", "title": "Cache effectiveness (1h)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 6, "x": 12, "y": 84}, "targets": [ {"refId": "A", "expr": "sum(rate(pallas_llm_tokens_total{kind=\"cache_read\", instance=~\"$pallas_inst\"}[1h])) / clamp_min(sum(rate(pallas_llm_tokens_total{kind=~\"input|cache_read\", instance=~\"$pallas_inst\"}[1h])), 0.0001)", "legendFormat": "cache hit"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value", "graphMode": "area"}, "fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "orange", "value": 0.2}, {"color": "green", "value": 0.5}]}}} }, { "id": 424, "type": "stat", "title": "LLM turns/sec (5m)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 6, "x": 18, "y": 84}, "targets": [ {"refId": "A", "expr": "sum(rate(pallas_llm_turns_total{instance=~\"$pallas_inst\", agent=~\"$agent\"}[5m]))", "legendFormat": "turns/s"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value", "graphMode": "area"}, "fieldConfig": {"defaults": {"unit": "ops"}} }, { "id": 430, "type": "timeseries", "title": "Cypher tool calls — rate by outcome (Pallas → ariel)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 92}, "targets": [ {"refId": "A", "expr": "sum by (outcome) (rate(pallas_tool_calls_total{server=\"neo4j_cypher\", instance=~\"$pallas_inst\", agent=~\"$agent\"}[5m]))", "legendFormat": "{{outcome}}"} ], "fieldConfig": {"defaults": {"unit": "ops"}}, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 431, "type": "timeseries", "title": "Cypher tool calls — p95 latency by agent", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 92}, "targets": [ {"refId": "A", "expr": "histogram_quantile(0.95, sum by (le, agent, instance) (rate(pallas_tool_call_duration_seconds_bucket{server=\"neo4j_cypher\", instance=~\"$pallas_inst\", agent=~\"$agent\"}[5m])))", "legendFormat": "{{instance}}/{{agent}}"} ], "fieldConfig": {"defaults": {"unit": "s"}}, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 440, "type": "timeseries", "title": "Neo4j @ ariel — transactions (rate / open)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 100}, "targets": [ {"refId": "A", "expr": "rate(neo4j_monitor_tx_totalOpenedTx{instance=~\"ariel.*\"}[5m])", "legendFormat": "{{instance}} open rate"}, {"refId": "B", "expr": "neo4j_monitor_tx_currentOpenedTx{instance=~\"ariel.*\"}", "legendFormat": "{{instance}} current open"} ], "fieldConfig": {"defaults": {"unit": "short"}}, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 441, "type": "stat", "title": "Neo4j @ ariel — rollback ratio (10m)", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 6, "x": 12, "y": 100}, "targets": [ {"refId": "A", "expr": "rate(neo4j_monitor_tx_rolledBackTx{instance=~\"ariel.*\"}[10m]) / clamp_min(rate(neo4j_monitor_tx_totalOpenedTx{instance=~\"ariel.*\"}[10m]), 0.0001)", "legendFormat": "{{instance}}"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "value_and_name"}, "fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 0.05}, {"color": "red", "value": 0.10}]}}} }, { "id": 442, "type": "stat", "title": "Neo4j @ ariel — store size", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 6, "x": 18, "y": 100}, "targets": [ {"refId": "A", "expr": "neo4j_monitor_store_totalStoreSize{instance=~\"ariel.*\"}", "legendFormat": "{{instance}}"} ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "value_and_name"}, "fieldConfig": {"defaults": {"unit": "bytes"}} } ] }