feat(ansible): standardize Neo4j ports and add monitoring
- Unify Neo4j HTTP/Bolt/syslog ports across ariel and umbriel hosts - Add neo4j_metrics_port (22094) for APOC exporter sidecar - Add umbriel to Prometheus node_exporter targets - Add Neo4j scrape config and alerts for tx rollback rate and stalled store growth - Replace kernos_harper MCP with andromeda (caliban.helu.ca) - Remove angelia MCP from kottos fastagent config - Switch neo4j group membership from keeper_user to ponos
This commit is contained in:
351
dashboards/neo4j.json
Normal file
351
dashboards/neo4j.json
Normal file
@@ -0,0 +1,351 @@
|
||||
{
|
||||
"title": "Neo4j",
|
||||
"uid": "neo4j",
|
||||
"tags": ["neo4j", "graph"],
|
||||
"timezone": "browser",
|
||||
"schemaVersion": 39,
|
||||
"version": 1,
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"weekStart": "",
|
||||
"refresh": "30s",
|
||||
"time": {"from": "now-1h", "to": "now"},
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"name": "loki",
|
||||
"type": "datasource",
|
||||
"query": "loki",
|
||||
"current": {"selected": false, "text": "Loki", "value": "Loki"},
|
||||
"hide": 0,
|
||||
"label": "Loki datasource"
|
||||
},
|
||||
{
|
||||
"name": "prom",
|
||||
"type": "datasource",
|
||||
"query": "prometheus",
|
||||
"current": {"selected": false, "text": "Prometheus", "value": "Prometheus"},
|
||||
"hide": 0,
|
||||
"label": "Prometheus datasource"
|
||||
},
|
||||
{
|
||||
"name": "instance",
|
||||
"type": "query",
|
||||
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||
"query": "label_values(up{job=\"neo4j\"}, instance)",
|
||||
"refresh": 1,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"current": {"selected": true, "text": "All", "value": "$__all"},
|
||||
"label": "Instance"
|
||||
}
|
||||
]
|
||||
},
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"type": "row",
|
||||
"title": "Overview",
|
||||
"collapsed": false,
|
||||
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"type": "stat",
|
||||
"title": "Exporter up",
|
||||
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||
"gridPos": {"h": 4, "w": 6, "x": 0, "y": 1},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "up{job=\"neo4j\", instance=~\"$instance\"}",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value_and_name"},
|
||||
"fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 1}]}}}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"type": "stat",
|
||||
"title": "Nodes",
|
||||
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||
"gridPos": {"h": 4, "w": 6, "x": 6, "y": 1},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "neo4j_monitor_ids_nodeIds{instance=~\"$instance\"}",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "value_and_name"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"type": "stat",
|
||||
"title": "Relationships",
|
||||
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||
"gridPos": {"h": 4, "w": 6, "x": 12, "y": 1},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "neo4j_monitor_ids_relIds{instance=~\"$instance\"}",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "value_and_name"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}}
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"type": "stat",
|
||||
"title": "Total store size",
|
||||
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||
"gridPos": {"h": 4, "w": 6, "x": 18, "y": 1},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "neo4j_monitor_store_totalStoreSize{instance=~\"$instance\"}",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "value_and_name"},
|
||||
"fieldConfig": {"defaults": {"unit": "bytes"}}
|
||||
},
|
||||
|
||||
{
|
||||
"id": 10,
|
||||
"type": "row",
|
||||
"title": "Transactions",
|
||||
"collapsed": false,
|
||||
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 5}
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"type": "timeseries",
|
||||
"title": "Transaction open rate (per second)",
|
||||
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 6},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "rate(neo4j_monitor_tx_totalOpenedTx{instance=~\"$instance\"}[5m])",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {"defaults": {"unit": "ops"}},
|
||||
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
"type": "timeseries",
|
||||
"title": "Currently open transactions",
|
||||
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 6},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "neo4j_monitor_tx_currentOpenedTx{instance=~\"$instance\"}",
|
||||
"legendFormat": "{{instance}} current"
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "neo4j_monitor_tx_peakTx{instance=~\"$instance\"}",
|
||||
"legendFormat": "{{instance}} peak"
|
||||
}
|
||||
],
|
||||
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"type": "stat",
|
||||
"title": "Rollback ratio (10m)",
|
||||
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||
"gridPos": {"h": 4, "w": 12, "x": 0, "y": 14},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "rate(neo4j_monitor_tx_rolledBackTx{instance=~\"$instance\"}[10m]) / clamp_min(rate(neo4j_monitor_tx_totalOpenedTx{instance=~\"$instance\"}[10m]), 0.0001)",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "value_and_name"},
|
||||
"fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 0.05}, {"color": "red", "value": 0.10}]}}}
|
||||
},
|
||||
{
|
||||
"id": 14,
|
||||
"type": "stat",
|
||||
"title": "Last tx ID",
|
||||
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||
"gridPos": {"h": 4, "w": 12, "x": 12, "y": 14},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "neo4j_monitor_tx_lastTxId{instance=~\"$instance\"}",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "value_and_name"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}}
|
||||
},
|
||||
|
||||
{
|
||||
"id": 20,
|
||||
"type": "row",
|
||||
"title": "Store breakdown",
|
||||
"collapsed": false,
|
||||
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 18}
|
||||
},
|
||||
{
|
||||
"id": 21,
|
||||
"type": "timeseries",
|
||||
"title": "Store size by component",
|
||||
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 19},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "neo4j_monitor_store_nodeStoreSize{instance=~\"$instance\"}",
|
||||
"legendFormat": "{{instance}} nodes"
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "neo4j_monitor_store_relStoreSize{instance=~\"$instance\"}",
|
||||
"legendFormat": "{{instance}} rels"
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "neo4j_monitor_store_propStoreSize{instance=~\"$instance\"}",
|
||||
"legendFormat": "{{instance}} props"
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "neo4j_monitor_store_stringStoreSize{instance=~\"$instance\"}",
|
||||
"legendFormat": "{{instance}} strings"
|
||||
},
|
||||
{
|
||||
"refId": "E",
|
||||
"expr": "neo4j_monitor_store_arrayStoreSize{instance=~\"$instance\"}",
|
||||
"legendFormat": "{{instance}} arrays"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {"defaults": {"unit": "bytes"}},
|
||||
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
|
||||
},
|
||||
{
|
||||
"id": 22,
|
||||
"type": "timeseries",
|
||||
"title": "Transaction log size",
|
||||
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 19},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "neo4j_monitor_store_logSize{instance=~\"$instance\"}",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {"defaults": {"unit": "bytes"}},
|
||||
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
|
||||
},
|
||||
|
||||
{
|
||||
"id": 30,
|
||||
"type": "row",
|
||||
"title": "Exporter JVM (sidecar health)",
|
||||
"collapsed": true,
|
||||
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 27}
|
||||
},
|
||||
{
|
||||
"id": 31,
|
||||
"type": "timeseries",
|
||||
"title": "Exporter JVM heap used / max",
|
||||
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 28},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "jvm_memory_used_bytes{job=\"neo4j\", area=\"heap\", instance=~\"$instance\"}",
|
||||
"legendFormat": "{{instance}} used"
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "jvm_memory_max_bytes{job=\"neo4j\", area=\"heap\", instance=~\"$instance\"}",
|
||||
"legendFormat": "{{instance}} max"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {"defaults": {"unit": "bytes"}},
|
||||
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
|
||||
},
|
||||
{
|
||||
"id": 32,
|
||||
"type": "timeseries",
|
||||
"title": "Exporter GC time",
|
||||
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 28},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "rate(jvm_gc_collection_seconds_sum{job=\"neo4j\", instance=~\"$instance\"}[5m])",
|
||||
"legendFormat": "{{instance}} {{gc}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {"defaults": {"unit": "s"}},
|
||||
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
|
||||
},
|
||||
|
||||
{
|
||||
"id": 40,
|
||||
"type": "row",
|
||||
"title": "Logs",
|
||||
"collapsed": false,
|
||||
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 36}
|
||||
},
|
||||
{
|
||||
"id": 41,
|
||||
"type": "timeseries",
|
||||
"title": "Neo4j log rate by host",
|
||||
"datasource": {"type": "loki", "uid": "${loki}"},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 37},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "sum by (hostname) (rate({job=\"neo4j\"}[5m]))",
|
||||
"legendFormat": "{{hostname}}"
|
||||
}
|
||||
],
|
||||
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
|
||||
},
|
||||
{
|
||||
"id": 42,
|
||||
"type": "logs",
|
||||
"title": "Neo4j — last 50 lines (errors/warnings first)",
|
||||
"datasource": {"type": "loki", "uid": "${loki}"},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 37},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "{job=\"neo4j\"} |~ \"(?i)error|warn|exception\"",
|
||||
"maxLines": 50
|
||||
}
|
||||
],
|
||||
"options": {"showLabels": true, "showTime": true, "wrapLogMessage": true}
|
||||
},
|
||||
{
|
||||
"id": 43,
|
||||
"type": "logs",
|
||||
"title": "Neo4j — all logs (live tail)",
|
||||
"datasource": {"type": "loki", "uid": "${loki}"},
|
||||
"gridPos": {"h": 10, "w": 24, "x": 0, "y": 45},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "{job=\"neo4j\"}",
|
||||
"maxLines": 100
|
||||
}
|
||||
],
|
||||
"options": {"showLabels": true, "showTime": true, "wrapLogMessage": true}
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user