feat(alloy): add journal relabeling and kottos integration on puck

Introduce structured journal relabel rules on puck to tag Pallas-managed
units with {service, project, component} labels matching the Mnemosyne
and Daedalus schema. Add kottos release variable and vault secrets
example entries for the new Pallas FastAgent runtime.

Remove the defunct mnemosyne syslog listener now that Mnemosyne ships
JSON logs via the docker-socket pipeline.
This commit is contained in:
2026-05-11 13:54:14 -04:00
parent e92ab80bbf
commit 8c95173705
19 changed files with 1336 additions and 27 deletions

View File

@@ -0,0 +1,242 @@
{
"title": "Puck Services — Logs & Health",
"uid": "puck-services-logs",
"tags": ["puck", "logs", "mnemosyne", "pallas", "daedalus"],
"timezone": "browser",
"schemaVersion": 39,
"version": 1,
"editable": true,
"fiscalYearStartMonth": 0,
"weekStart": "",
"refresh": "30s",
"time": {"from": "now-1h", "to": "now"},
"templating": {
"list": [
{
"name": "loki",
"type": "datasource",
"query": "loki",
"current": {"selected": false, "text": "Loki", "value": "Loki"},
"hide": 0,
"label": "Loki datasource"
},
{
"name": "prom",
"type": "datasource",
"query": "prometheus",
"current": {"selected": false, "text": "Prometheus", "value": "Prometheus"},
"hide": 0,
"label": "Prometheus datasource"
}
]
},
"panels": [
{
"id": 1,
"type": "row",
"title": "Mnemosyne",
"collapsed": false,
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}
},
{
"id": 2,
"type": "timeseries",
"title": "Mnemosyne — log rate by level",
"datasource": {"type": "loki", "uid": "${loki}"},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 1},
"targets": [
{
"refId": "A",
"expr": "sum by (level) (rate({service=\"mnemosyne\"} | json [5m]))",
"legendFormat": "{{level}}"
}
],
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 3,
"type": "logs",
"title": "Mnemosyne — errors (last 25)",
"datasource": {"type": "loki", "uid": "${loki}"},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 1},
"targets": [
{
"refId": "A",
"expr": "{service=\"mnemosyne\"} | json | level=\"ERROR\"",
"maxLines": 25
}
],
"options": {"showLabels": false, "showTime": true, "wrapLogMessage": true}
},
{
"id": 4,
"type": "stat",
"title": "Mnemosyne — HTTP 5xx rate",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 8, "x": 0, "y": 9},
"targets": [
{
"refId": "A",
"expr": "sum(rate(django_http_responses_total_by_status_total{job=\"mnemosyne\",status=~\"5..\"}[5m])) / clamp_min(sum(rate(django_http_responses_total_by_status_total{job=\"mnemosyne\"}[5m])), 0.0001)"
}
],
"options": {
"reduceOptions": {"calcs": ["lastNotNull"]},
"colorMode": "value",
"textMode": "auto"
},
"fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 0.01}, {"color": "red", "value": 0.05}]}}}
},
{
"id": 5,
"type": "stat",
"title": "Mnemosyne — p95 latency",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 8, "x": 8, "y": 9},
"targets": [
{
"refId": "A",
"expr": "histogram_quantile(0.95, sum by (le) (rate(django_http_requests_latency_including_middlewares_seconds_bucket{job=\"mnemosyne\"}[5m])))"
}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}},
"fieldConfig": {"defaults": {"unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 1}, {"color": "red", "value": 5}]}}}
},
{
"id": 6,
"type": "stat",
"title": "Mnemosyne — MCP tool error rate",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 8, "x": 16, "y": 9},
"targets": [
{
"refId": "A",
"expr": "sum(rate(mcp_tool_invocations_total{job=\"mnemosyne\",status=\"error\"}[5m])) / clamp_min(sum(rate(mcp_tool_invocations_total{job=\"mnemosyne\"}[5m])), 0.0001)"
}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}},
"fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 0.05}, {"color": "red", "value": 0.10}]}}}
},
{
"id": 10,
"type": "row",
"title": "Pallas (Kottos agents)",
"collapsed": false,
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 13}
},
{
"id": 11,
"type": "timeseries",
"title": "Pallas — log rate by agent (component)",
"datasource": {"type": "loki", "uid": "${loki}"},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 14},
"targets": [
{
"refId": "A",
"expr": "sum by (component) (rate({service=\"pallas\", project=\"kottos\"} | json [5m]))",
"legendFormat": "{{component}}"
}
],
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 12,
"type": "logs",
"title": "Pallas — forward trace errors (opaque MCP transport failures)",
"datasource": {"type": "loki", "uid": "${loki}"},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 14},
"targets": [
{
"refId": "A",
"expr": "{service=\"pallas\", project=\"kottos\"} |= \"pallas.forward.trace\" | json | level=~\"ERROR|WARNING\"",
"maxLines": 25
}
],
"options": {"showLabels": false, "showTime": true, "wrapLogMessage": true}
},
{
"id": 13,
"type": "logs",
"title": "Pallas — last 25 ERROR lines (any agent)",
"datasource": {"type": "loki", "uid": "${loki}"},
"gridPos": {"h": 8, "w": 24, "x": 0, "y": 22},
"targets": [
{
"refId": "A",
"expr": "{service=\"pallas\", project=\"kottos\"} | json | level=\"ERROR\"",
"maxLines": 25
}
],
"options": {"showLabels": true, "showTime": true, "wrapLogMessage": true}
},
{
"id": 20,
"type": "row",
"title": "Daedalus",
"collapsed": false,
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 30}
},
{
"id": 21,
"type": "timeseries",
"title": "Daedalus — log rate by level",
"datasource": {"type": "loki", "uid": "${loki}"},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 31},
"targets": [
{
"refId": "A",
"expr": "sum by (level) (rate({service=\"daedalus\"} | json [5m]))",
"legendFormat": "{{level}}"
}
],
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
},
{
"id": 22,
"type": "stat",
"title": "Daedalus — HTTP 5xx rate",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 6, "x": 12, "y": 31},
"targets": [
{
"refId": "A",
"expr": "sum(rate(daedalus_http_requests_total{status=~\"5..\"}[5m])) / clamp_min(sum(rate(daedalus_http_requests_total[5m])), 0.0001)"
}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}},
"fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 0.01}, {"color": "red", "value": 0.05}]}}}
},
{
"id": 23,
"type": "stat",
"title": "Daedalus — MCP p95 latency",
"datasource": {"type": "prometheus", "uid": "${prom}"},
"gridPos": {"h": 4, "w": 6, "x": 18, "y": 31},
"targets": [
{
"refId": "A",
"expr": "histogram_quantile(0.95, rate(daedalus_mcp_request_duration_seconds_bucket[5m]))"
}
],
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}},
"fieldConfig": {"defaults": {"unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 5}, {"color": "red", "value": 30}]}}}
},
{
"id": 24,
"type": "logs",
"title": "Daedalus — errors (last 25)",
"datasource": {"type": "loki", "uid": "${loki}"},
"gridPos": {"h": 8, "w": 24, "x": 0, "y": 39},
"targets": [
{
"refId": "A",
"expr": "{service=\"daedalus\"} | json | level=\"ERROR\"",
"maxLines": 25
}
],
"options": {"showLabels": false, "showTime": true, "wrapLogMessage": true}
}
]
}