{ "title": "Puck Services — Logs & Health", "uid": "puck-services-logs", "tags": ["puck", "logs", "mnemosyne", "pallas", "daedalus"], "timezone": "browser", "schemaVersion": 39, "version": 1, "editable": true, "fiscalYearStartMonth": 0, "weekStart": "", "refresh": "30s", "time": {"from": "now-1h", "to": "now"}, "templating": { "list": [ { "name": "loki", "type": "datasource", "query": "loki", "current": {"selected": false, "text": "Loki", "value": "Loki"}, "hide": 0, "label": "Loki datasource" }, { "name": "prom", "type": "datasource", "query": "prometheus", "current": {"selected": false, "text": "Prometheus", "value": "Prometheus"}, "hide": 0, "label": "Prometheus datasource" } ] }, "panels": [ { "id": 1, "type": "row", "title": "Mnemosyne", "collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0} }, { "id": 2, "type": "timeseries", "title": "Mnemosyne — log rate by level", "datasource": {"type": "loki", "uid": "${loki}"}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 1}, "targets": [ { "refId": "A", "expr": "sum by (level) (rate({service=\"mnemosyne\"} | json [5m]))", "legendFormat": "{{level}}" } ], "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 3, "type": "logs", "title": "Mnemosyne — errors (last 25)", "datasource": {"type": "loki", "uid": "${loki}"}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 1}, "targets": [ { "refId": "A", "expr": "{service=\"mnemosyne\"} | json | level=\"ERROR\"", "maxLines": 25 } ], "options": {"showLabels": false, "showTime": true, "wrapLogMessage": true} }, { "id": 4, "type": "stat", "title": "Mnemosyne — HTTP 5xx rate", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 8, "x": 0, "y": 9}, "targets": [ { "refId": "A", "expr": "sum(rate(django_http_responses_total_by_status_total{job=\"mnemosyne\",status=~\"5..\"}[5m])) / clamp_min(sum(rate(django_http_responses_total_by_status_total{job=\"mnemosyne\"}[5m])), 0.0001)" } ], "options": { "reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "auto" }, "fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 0.01}, {"color": "red", "value": 0.05}]}}} }, { "id": 5, "type": "stat", "title": "Mnemosyne — p95 latency", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 8, "x": 8, "y": 9}, "targets": [ { "refId": "A", "expr": "histogram_quantile(0.95, sum by (le) (rate(django_http_requests_latency_including_middlewares_seconds_bucket{job=\"mnemosyne\"}[5m])))" } ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}}, "fieldConfig": {"defaults": {"unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 1}, {"color": "red", "value": 5}]}}} }, { "id": 6, "type": "stat", "title": "Mnemosyne — MCP tool error rate", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 8, "x": 16, "y": 9}, "targets": [ { "refId": "A", "expr": "sum(rate(mcp_tool_invocations_total{job=\"mnemosyne\",status=\"error\"}[5m])) / clamp_min(sum(rate(mcp_tool_invocations_total{job=\"mnemosyne\"}[5m])), 0.0001)" } ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}}, "fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 0.05}, {"color": "red", "value": 0.10}]}}} }, { "id": 10, "type": "row", "title": "Pallas (Kottos agents)", "collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 13} }, { "id": 11, "type": "timeseries", "title": "Pallas — log rate by agent (component)", "datasource": {"type": "loki", "uid": "${loki}"}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 14}, "targets": [ { "refId": "A", "expr": "sum by (component) (rate({service=\"pallas\", project=\"kottos\"} | json [5m]))", "legendFormat": "{{component}}" } ], "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 12, "type": "logs", "title": "Pallas — forward trace errors (opaque MCP transport failures)", "datasource": {"type": "loki", "uid": "${loki}"}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 14}, "targets": [ { "refId": "A", "expr": "{service=\"pallas\", project=\"kottos\"} |= \"pallas.forward.trace\" | json | level=~\"ERROR|WARNING\"", "maxLines": 25 } ], "options": {"showLabels": false, "showTime": true, "wrapLogMessage": true} }, { "id": 13, "type": "logs", "title": "Pallas — last 25 ERROR lines (any agent)", "datasource": {"type": "loki", "uid": "${loki}"}, "gridPos": {"h": 8, "w": 24, "x": 0, "y": 22}, "targets": [ { "refId": "A", "expr": "{service=\"pallas\", project=\"kottos\"} | json | level=\"ERROR\"", "maxLines": 25 } ], "options": {"showLabels": true, "showTime": true, "wrapLogMessage": true} }, { "id": 20, "type": "row", "title": "Daedalus", "collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 30} }, { "id": 21, "type": "timeseries", "title": "Daedalus — log rate by level", "datasource": {"type": "loki", "uid": "${loki}"}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 31}, "targets": [ { "refId": "A", "expr": "sum by (level) (rate({service=\"daedalus\"} | json [5m]))", "legendFormat": "{{level}}" } ], "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}} }, { "id": 22, "type": "stat", "title": "Daedalus — HTTP 5xx rate", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 6, "x": 12, "y": 31}, "targets": [ { "refId": "A", "expr": "sum(rate(daedalus_http_requests_total{status=~\"5..\"}[5m])) / clamp_min(sum(rate(daedalus_http_requests_total[5m])), 0.0001)" } ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}}, "fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 0.01}, {"color": "red", "value": 0.05}]}}} }, { "id": 23, "type": "stat", "title": "Daedalus — MCP p95 latency", "datasource": {"type": "prometheus", "uid": "${prom}"}, "gridPos": {"h": 4, "w": 6, "x": 18, "y": 31}, "targets": [ { "refId": "A", "expr": "histogram_quantile(0.95, rate(daedalus_mcp_request_duration_seconds_bucket[5m]))" } ], "options": {"reduceOptions": {"calcs": ["lastNotNull"]}}, "fieldConfig": {"defaults": {"unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 5}, {"color": "red", "value": 30}]}}} }, { "id": 24, "type": "logs", "title": "Daedalus — errors (last 25)", "datasource": {"type": "loki", "uid": "${loki}"}, "gridPos": {"h": 8, "w": 24, "x": 0, "y": 39}, "targets": [ { "refId": "A", "expr": "{service=\"daedalus\"} | json | level=\"ERROR\"", "maxLines": 25 } ], "options": {"showLabels": false, "showTime": true, "wrapLogMessage": true} } ] }