feat(ansible): standardize Neo4j ports and add monitoring
- Unify Neo4j HTTP/Bolt/syslog ports across ariel and umbriel hosts - Add neo4j_metrics_port (22094) for APOC exporter sidecar - Add umbriel to Prometheus node_exporter targets - Add Neo4j scrape config and alerts for tx rollback rate and stalled store growth - Replace kernos_harper MCP with andromeda (caliban.helu.ca) - Remove angelia MCP from kottos fastagent config - Switch neo4j group membership from keeper_user to ponos
This commit is contained in:
@@ -9,7 +9,7 @@ services:
|
|||||||
|
|
||||||
# Alloy
|
# Alloy
|
||||||
alloy_log_level: "warn"
|
alloy_log_level: "warn"
|
||||||
neo4j_syslog_port: 22011
|
neo4j_syslog_port: 51414
|
||||||
|
|
||||||
# Neo4j
|
# Neo4j
|
||||||
neo4j_user: neo4j
|
neo4j_user: neo4j
|
||||||
@@ -17,6 +17,7 @@ neo4j_group: neo4j
|
|||||||
neo4j_directory: /srv/neo4j
|
neo4j_directory: /srv/neo4j
|
||||||
neo4j_auth_user: neo4j
|
neo4j_auth_user: neo4j
|
||||||
neo4j_auth_password: "{{ vault_neo4j_auth_password }}"
|
neo4j_auth_password: "{{ vault_neo4j_auth_password }}"
|
||||||
neo4j_http_port: 25554
|
neo4j_http_port: 22084
|
||||||
neo4j_bolt_port: 7687
|
neo4j_bolt_port: 22074
|
||||||
|
neo4j_metrics_port: 22094
|
||||||
neo4j_apoc_unrestricted: "apoc.*"
|
neo4j_apoc_unrestricted: "apoc.*"
|
||||||
|
|||||||
@@ -72,6 +72,7 @@ prometheus_targets:
|
|||||||
- 'sycorax.incus:9100'
|
- 'sycorax.incus:9100'
|
||||||
- 'prospero.incus:9100'
|
- 'prospero.incus:9100'
|
||||||
- 'rosalind.incus:9100'
|
- 'rosalind.incus:9100'
|
||||||
|
- 'umbriel.incus:9100'
|
||||||
|
|
||||||
# Prometheus OAuth2-Proxy Sidecar
|
# Prometheus OAuth2-Proxy Sidecar
|
||||||
prometheus_proxy_port: 9091
|
prometheus_proxy_port: 9091
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ services:
|
|||||||
|
|
||||||
# Alloy
|
# Alloy
|
||||||
alloy_log_level: "warn"
|
alloy_log_level: "warn"
|
||||||
neo4j_syslog_port: 22012
|
neo4j_syslog_port: 51414
|
||||||
|
|
||||||
# Neo4j
|
# Neo4j
|
||||||
neo4j_user: neo4j
|
neo4j_user: neo4j
|
||||||
@@ -21,6 +21,7 @@ neo4j_group: neo4j
|
|||||||
neo4j_directory: /srv/neo4j
|
neo4j_directory: /srv/neo4j
|
||||||
neo4j_auth_user: neo4j
|
neo4j_auth_user: neo4j
|
||||||
neo4j_auth_password: "{{ vault_mnemosyne_neo4j_auth_password }}"
|
neo4j_auth_password: "{{ vault_mnemosyne_neo4j_auth_password }}"
|
||||||
neo4j_http_port: 25555
|
neo4j_http_port: 22084
|
||||||
neo4j_bolt_port: 7687
|
neo4j_bolt_port: 22074
|
||||||
|
neo4j_metrics_port: 22094
|
||||||
neo4j_apoc_unrestricted: "apoc.*"
|
neo4j_apoc_unrestricted: "apoc.*"
|
||||||
|
|||||||
@@ -54,17 +54,11 @@ mcp:
|
|||||||
url: "{{ kottos_grafana_url | default('http://miranda.incus:25533/mcp') }}"
|
url: "{{ kottos_grafana_url | default('http://miranda.incus:25533/mcp') }}"
|
||||||
|
|
||||||
# ── Shell + file operations — Kernos (Korax) ─────────────────────────────
|
# ── Shell + file operations — Kernos (Korax) ─────────────────────────────
|
||||||
kernos_harper:
|
andromeda:
|
||||||
transport: http
|
transport: http
|
||||||
url: "{{ kottos_kernos_harper_url | default('http://korax.helu.ca:20261/mcp') }}"
|
url: "{{ kottos_kernos_harper_url | default('http://caliban.helu.ca:20261/mcp') }}"
|
||||||
load_on_start: false
|
load_on_start: false
|
||||||
|
|
||||||
# ── Angelia messaging ───────────────────────────────────────────────────
|
|
||||||
# Auth header provided by fastagent.secrets.yaml (vault-rendered).
|
|
||||||
angelia:
|
|
||||||
transport: http
|
|
||||||
url: "{{ kottos_angelia_url | default('https://ouranos.helu.ca/mcp/') }}"
|
|
||||||
|
|
||||||
# ── GitHub MCP Server (local Docker, stdio) ──────────────────────────────
|
# ── GitHub MCP Server (local Docker, stdio) ──────────────────────────────
|
||||||
# GITHUB_PERSONAL_ACCESS_TOKEN provided by fastagent.secrets.yaml
|
# GITHUB_PERSONAL_ACCESS_TOKEN provided by fastagent.secrets.yaml
|
||||||
github:
|
github:
|
||||||
|
|||||||
@@ -24,9 +24,9 @@
|
|||||||
group: "{{neo4j_group}}"
|
group: "{{neo4j_group}}"
|
||||||
system: true
|
system: true
|
||||||
|
|
||||||
- name: Add group neo4j to keeper_user
|
- name: Add group neo4j to user ponos
|
||||||
ansible.builtin.user:
|
ansible.builtin.user:
|
||||||
name: "{{keeper_user}}"
|
name: ponos
|
||||||
groups: "{{neo4j_group}}"
|
groups: "{{neo4j_group}}"
|
||||||
append: true
|
append: true
|
||||||
|
|
||||||
@@ -38,6 +38,14 @@
|
|||||||
state: directory
|
state: directory
|
||||||
mode: '750'
|
mode: '750'
|
||||||
|
|
||||||
|
- name: Create neo4j data directory
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{neo4j_directory}}/data"
|
||||||
|
owner: "{{neo4j_user}}"
|
||||||
|
group: "{{neo4j_group}}"
|
||||||
|
state: directory
|
||||||
|
mode: '750'
|
||||||
|
|
||||||
- name: Template docker-compose file
|
- name: Template docker-compose file
|
||||||
ansible.builtin.template:
|
ansible.builtin.template:
|
||||||
src: docker-compose.yml.j2
|
src: docker-compose.yml.j2
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
services:
|
services:
|
||||||
neo4j:
|
neo4j:
|
||||||
image: neo4j:{{neo4j_image_version}}
|
image: neo4j:{{neo4j_version}}
|
||||||
|
pull_policy: always
|
||||||
container_name: neo4j
|
container_name: neo4j
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
ports:
|
ports:
|
||||||
@@ -11,9 +12,11 @@ services:
|
|||||||
- neo4j_logs:/logs
|
- neo4j_logs:/logs
|
||||||
- neo4j_plugins:/plugins
|
- neo4j_plugins:/plugins
|
||||||
environment:
|
environment:
|
||||||
NEO4J_AUTH: "{{neo4j_auth_user}}/{{neo4j_auth_password}}"
|
NEO4J_AUTH: "{{neo4j_user}}/{{neo4j_password}}"
|
||||||
# APOC Plugin
|
# APOC Plugin — core ("apoc") is required by apoc-extended.
|
||||||
NEO4J_PLUGINS: '["apoc"]'
|
# Listing only apoc-extended fails to expose apoc.version(),
|
||||||
|
# apoc.coll.*, apoc.date.* — declare both.
|
||||||
|
NEO4J_PLUGINS: '["apoc", "apoc-extended"]'
|
||||||
NEO4J_apoc_export_file_enabled: "true"
|
NEO4J_apoc_export_file_enabled: "true"
|
||||||
NEO4J_apoc_import_file_enabled: "true"
|
NEO4J_apoc_import_file_enabled: "true"
|
||||||
NEO4J_apoc_import_file_use__neo4j__config: "true"
|
NEO4J_apoc_import_file_use__neo4j__config: "true"
|
||||||
@@ -25,7 +28,31 @@ services:
|
|||||||
syslog-format: "{{syslog_format}}"
|
syslog-format: "{{syslog_format}}"
|
||||||
tag: "neo4j"
|
tag: "neo4j"
|
||||||
|
|
||||||
|
neo4j-exporter:
|
||||||
|
image: stscoundrel/neo4j-apoc-exporter:v0.1.0
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "{{neo4j_metrics_port}}:17687"
|
||||||
|
environment:
|
||||||
|
- NEO4J_URI=bolt://neo4j:7687
|
||||||
|
- NEO4J_USER={{neo4j_user}}
|
||||||
|
- NEO4J_PASSWORD={{neo4j_password}}
|
||||||
|
- EXPORTER_PORT=17687
|
||||||
|
depends_on:
|
||||||
|
- neo4j
|
||||||
|
logging:
|
||||||
|
driver: syslog
|
||||||
|
options:
|
||||||
|
syslog-address: "tcp://127.0.0.1:{{neo4j_syslog_port}}"
|
||||||
|
syslog-format: "{{syslog_format}}"
|
||||||
|
tag: "neo4j-exporter"
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
neo4j_data:
|
neo4j_data:
|
||||||
|
driver: local
|
||||||
|
driver_opts:
|
||||||
|
type: none
|
||||||
|
device: {{neo4j_directory}}/data
|
||||||
|
o: bind
|
||||||
neo4j_logs:
|
neo4j_logs:
|
||||||
neo4j_plugins:
|
neo4j_plugins:
|
||||||
@@ -384,6 +384,48 @@ groups:
|
|||||||
summary: "Mnemosyne Celery backlog on {{ $labels.queue }}"
|
summary: "Mnemosyne Celery backlog on {{ $labels.queue }}"
|
||||||
description: "Celery queue '{{ $labels.queue }}' has {{ $value }} pending tasks for more than 10 minutes — check the worker logs in Loki ({service=\"mnemosyne\", component=\"worker\"})."
|
description: "Celery queue '{{ $labels.queue }}' has {{ $value }} pending tasks for more than 10 minutes — check the worker logs in Loki ({service=\"mnemosyne\", component=\"worker\"})."
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Neo4j Alerts (neo4j-apoc-exporter sidecar)
|
||||||
|
# ============================================================================
|
||||||
|
# Metrics come from stscoundrel/neo4j-apoc-exporter, which connects to
|
||||||
|
# Neo4j over Bolt and surfaces apoc.monitor.* gauges plus standard JVM
|
||||||
|
# metrics. "Exporter down" therefore covers both "exporter container
|
||||||
|
# crashed" and "exporter cannot reach Bolt" — either way Neo4j is
|
||||||
|
# effectively unobservable. Hostname-only — purpose of each instance
|
||||||
|
# is implied by the host (e.g. ariel = LLM memory, umbriel = Mnemosyne).
|
||||||
|
- name: neo4j_alerts
|
||||||
|
rules:
|
||||||
|
- alert: Neo4jExporterDown
|
||||||
|
expr: up{job="neo4j"} == 0
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "Neo4j exporter down on {{ $labels.instance }}"
|
||||||
|
description: "The neo4j-apoc-exporter on {{ $labels.instance }} has been unreachable for more than 5 minutes. Either the sidecar container is down or it cannot connect to Neo4j over Bolt — check `docker ps` and `docker logs neo4j-exporter` on the host."
|
||||||
|
|
||||||
|
- alert: Neo4jHighRollbackRate
|
||||||
|
expr: |
|
||||||
|
rate(neo4j_monitor_tx_rolledBackTx[10m])
|
||||||
|
/ clamp_min(rate(neo4j_monitor_tx_totalOpenedTx[10m]), 1) > 0.10
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Neo4j transaction rollback rate above 10% on {{ $labels.instance }}"
|
||||||
|
description: "More than 10% of transactions on {{ $labels.instance }} have rolled back over the last 10 minutes — check application logs in Loki ({job=\"neo4j\", hostname=\"{{ $labels.instance }}\"})."
|
||||||
|
|
||||||
|
- alert: Neo4jStoreGrowthStalled
|
||||||
|
expr: |
|
||||||
|
rate(neo4j_monitor_tx_totalOpenedTx[15m]) == 0
|
||||||
|
and neo4j_monitor_tx_currentOpenedTx > 0
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Neo4j has open transactions but zero throughput on {{ $labels.instance }}"
|
||||||
|
description: "{{ $labels.instance }} shows {{ $value }} currently-open transactions but no new transactions opened in 15 minutes — possible Bolt-side hang or stuck query."
|
||||||
|
|
||||||
# Red Panda Seal of Approval 🐼
|
# Red Panda Seal of Approval 🐼
|
||||||
# "If the metrics aren't red, go back to bed"
|
# "If the metrics aren't red, go back to bed"
|
||||||
{% endraw %}
|
{% endraw %}
|
||||||
|
|||||||
@@ -62,4 +62,17 @@ scrape_configs:
|
|||||||
metrics_path: '/metrics'
|
metrics_path: '/metrics'
|
||||||
scrape_interval: 15s
|
scrape_interval: 15s
|
||||||
|
|
||||||
|
# Neo4j — stscoundrel/neo4j-apoc-exporter sidecar connects to the local
|
||||||
|
# Neo4j over Bolt and exposes apoc.monitor.* (tx/ids/store) plus JVM
|
||||||
|
# metrics on the standard metrics port (22094). Both Ariel (LLM memory
|
||||||
|
# via neo4j-cypher MCP) and Umbriel (Mnemosyne graph+vector DB) use the
|
||||||
|
# same port — they are differentiated by hostname only.
|
||||||
|
- job_name: 'neo4j'
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- 'ariel.incus:22094'
|
||||||
|
- 'umbriel.incus:22094'
|
||||||
|
metrics_path: '/metrics'
|
||||||
|
scrape_interval: 15s
|
||||||
|
|
||||||
# Red Panda Approved Prometheus Configuration
|
# Red Panda Approved Prometheus Configuration
|
||||||
|
|||||||
351
dashboards/neo4j.json
Normal file
351
dashboards/neo4j.json
Normal file
@@ -0,0 +1,351 @@
|
|||||||
|
{
|
||||||
|
"title": "Neo4j",
|
||||||
|
"uid": "neo4j",
|
||||||
|
"tags": ["neo4j", "graph"],
|
||||||
|
"timezone": "browser",
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"version": 1,
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"weekStart": "",
|
||||||
|
"refresh": "30s",
|
||||||
|
"time": {"from": "now-1h", "to": "now"},
|
||||||
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"name": "loki",
|
||||||
|
"type": "datasource",
|
||||||
|
"query": "loki",
|
||||||
|
"current": {"selected": false, "text": "Loki", "value": "Loki"},
|
||||||
|
"hide": 0,
|
||||||
|
"label": "Loki datasource"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "prom",
|
||||||
|
"type": "datasource",
|
||||||
|
"query": "prometheus",
|
||||||
|
"current": {"selected": false, "text": "Prometheus", "value": "Prometheus"},
|
||||||
|
"hide": 0,
|
||||||
|
"label": "Prometheus datasource"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "instance",
|
||||||
|
"type": "query",
|
||||||
|
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||||
|
"query": "label_values(up{job=\"neo4j\"}, instance)",
|
||||||
|
"refresh": 1,
|
||||||
|
"includeAll": true,
|
||||||
|
"multi": true,
|
||||||
|
"current": {"selected": true, "text": "All", "value": "$__all"},
|
||||||
|
"label": "Instance"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "row",
|
||||||
|
"title": "Overview",
|
||||||
|
"collapsed": false,
|
||||||
|
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "stat",
|
||||||
|
"title": "Exporter up",
|
||||||
|
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||||
|
"gridPos": {"h": 4, "w": 6, "x": 0, "y": 1},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"refId": "A",
|
||||||
|
"expr": "up{job=\"neo4j\", instance=~\"$instance\"}",
|
||||||
|
"legendFormat": "{{instance}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "colorMode": "value", "textMode": "value_and_name"},
|
||||||
|
"fieldConfig": {"defaults": {"mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "green", "value": 1}]}}}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "stat",
|
||||||
|
"title": "Nodes",
|
||||||
|
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||||
|
"gridPos": {"h": 4, "w": 6, "x": 6, "y": 1},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"refId": "A",
|
||||||
|
"expr": "neo4j_monitor_ids_nodeIds{instance=~\"$instance\"}",
|
||||||
|
"legendFormat": "{{instance}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "value_and_name"},
|
||||||
|
"fieldConfig": {"defaults": {"unit": "short"}}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "stat",
|
||||||
|
"title": "Relationships",
|
||||||
|
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||||
|
"gridPos": {"h": 4, "w": 6, "x": 12, "y": 1},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"refId": "A",
|
||||||
|
"expr": "neo4j_monitor_ids_relIds{instance=~\"$instance\"}",
|
||||||
|
"legendFormat": "{{instance}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "value_and_name"},
|
||||||
|
"fieldConfig": {"defaults": {"unit": "short"}}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "stat",
|
||||||
|
"title": "Total store size",
|
||||||
|
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||||
|
"gridPos": {"h": 4, "w": 6, "x": 18, "y": 1},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"refId": "A",
|
||||||
|
"expr": "neo4j_monitor_store_totalStoreSize{instance=~\"$instance\"}",
|
||||||
|
"legendFormat": "{{instance}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "value_and_name"},
|
||||||
|
"fieldConfig": {"defaults": {"unit": "bytes"}}
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"id": 10,
|
||||||
|
"type": "row",
|
||||||
|
"title": "Transactions",
|
||||||
|
"collapsed": false,
|
||||||
|
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 5}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 11,
|
||||||
|
"type": "timeseries",
|
||||||
|
"title": "Transaction open rate (per second)",
|
||||||
|
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 6},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"refId": "A",
|
||||||
|
"expr": "rate(neo4j_monitor_tx_totalOpenedTx{instance=~\"$instance\"}[5m])",
|
||||||
|
"legendFormat": "{{instance}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {"defaults": {"unit": "ops"}},
|
||||||
|
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 12,
|
||||||
|
"type": "timeseries",
|
||||||
|
"title": "Currently open transactions",
|
||||||
|
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 6},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"refId": "A",
|
||||||
|
"expr": "neo4j_monitor_tx_currentOpenedTx{instance=~\"$instance\"}",
|
||||||
|
"legendFormat": "{{instance}} current"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"refId": "B",
|
||||||
|
"expr": "neo4j_monitor_tx_peakTx{instance=~\"$instance\"}",
|
||||||
|
"legendFormat": "{{instance}} peak"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"type": "stat",
|
||||||
|
"title": "Rollback ratio (10m)",
|
||||||
|
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||||
|
"gridPos": {"h": 4, "w": 12, "x": 0, "y": 14},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"refId": "A",
|
||||||
|
"expr": "rate(neo4j_monitor_tx_rolledBackTx{instance=~\"$instance\"}[10m]) / clamp_min(rate(neo4j_monitor_tx_totalOpenedTx{instance=~\"$instance\"}[10m]), 0.0001)",
|
||||||
|
"legendFormat": "{{instance}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "value_and_name"},
|
||||||
|
"fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "orange", "value": 0.05}, {"color": "red", "value": 0.10}]}}}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 14,
|
||||||
|
"type": "stat",
|
||||||
|
"title": "Last tx ID",
|
||||||
|
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||||
|
"gridPos": {"h": 4, "w": 12, "x": 12, "y": 14},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"refId": "A",
|
||||||
|
"expr": "neo4j_monitor_tx_lastTxId{instance=~\"$instance\"}",
|
||||||
|
"legendFormat": "{{instance}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "value_and_name"},
|
||||||
|
"fieldConfig": {"defaults": {"unit": "short"}}
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"id": 20,
|
||||||
|
"type": "row",
|
||||||
|
"title": "Store breakdown",
|
||||||
|
"collapsed": false,
|
||||||
|
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 18}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21,
|
||||||
|
"type": "timeseries",
|
||||||
|
"title": "Store size by component",
|
||||||
|
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 19},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"refId": "A",
|
||||||
|
"expr": "neo4j_monitor_store_nodeStoreSize{instance=~\"$instance\"}",
|
||||||
|
"legendFormat": "{{instance}} nodes"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"refId": "B",
|
||||||
|
"expr": "neo4j_monitor_store_relStoreSize{instance=~\"$instance\"}",
|
||||||
|
"legendFormat": "{{instance}} rels"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"refId": "C",
|
||||||
|
"expr": "neo4j_monitor_store_propStoreSize{instance=~\"$instance\"}",
|
||||||
|
"legendFormat": "{{instance}} props"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"refId": "D",
|
||||||
|
"expr": "neo4j_monitor_store_stringStoreSize{instance=~\"$instance\"}",
|
||||||
|
"legendFormat": "{{instance}} strings"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"refId": "E",
|
||||||
|
"expr": "neo4j_monitor_store_arrayStoreSize{instance=~\"$instance\"}",
|
||||||
|
"legendFormat": "{{instance}} arrays"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {"defaults": {"unit": "bytes"}},
|
||||||
|
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 22,
|
||||||
|
"type": "timeseries",
|
||||||
|
"title": "Transaction log size",
|
||||||
|
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 19},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"refId": "A",
|
||||||
|
"expr": "neo4j_monitor_store_logSize{instance=~\"$instance\"}",
|
||||||
|
"legendFormat": "{{instance}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {"defaults": {"unit": "bytes"}},
|
||||||
|
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"id": 30,
|
||||||
|
"type": "row",
|
||||||
|
"title": "Exporter JVM (sidecar health)",
|
||||||
|
"collapsed": true,
|
||||||
|
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 27}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 31,
|
||||||
|
"type": "timeseries",
|
||||||
|
"title": "Exporter JVM heap used / max",
|
||||||
|
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 28},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"refId": "A",
|
||||||
|
"expr": "jvm_memory_used_bytes{job=\"neo4j\", area=\"heap\", instance=~\"$instance\"}",
|
||||||
|
"legendFormat": "{{instance}} used"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"refId": "B",
|
||||||
|
"expr": "jvm_memory_max_bytes{job=\"neo4j\", area=\"heap\", instance=~\"$instance\"}",
|
||||||
|
"legendFormat": "{{instance}} max"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {"defaults": {"unit": "bytes"}},
|
||||||
|
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 32,
|
||||||
|
"type": "timeseries",
|
||||||
|
"title": "Exporter GC time",
|
||||||
|
"datasource": {"type": "prometheus", "uid": "${prom}"},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 28},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"refId": "A",
|
||||||
|
"expr": "rate(jvm_gc_collection_seconds_sum{job=\"neo4j\", instance=~\"$instance\"}[5m])",
|
||||||
|
"legendFormat": "{{instance}} {{gc}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {"defaults": {"unit": "s"}},
|
||||||
|
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"id": 40,
|
||||||
|
"type": "row",
|
||||||
|
"title": "Logs",
|
||||||
|
"collapsed": false,
|
||||||
|
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 36}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 41,
|
||||||
|
"type": "timeseries",
|
||||||
|
"title": "Neo4j log rate by host",
|
||||||
|
"datasource": {"type": "loki", "uid": "${loki}"},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 37},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"refId": "A",
|
||||||
|
"expr": "sum by (hostname) (rate({job=\"neo4j\"}[5m]))",
|
||||||
|
"legendFormat": "{{hostname}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 42,
|
||||||
|
"type": "logs",
|
||||||
|
"title": "Neo4j — last 50 lines (errors/warnings first)",
|
||||||
|
"datasource": {"type": "loki", "uid": "${loki}"},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 37},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"refId": "A",
|
||||||
|
"expr": "{job=\"neo4j\"} |~ \"(?i)error|warn|exception\"",
|
||||||
|
"maxLines": 50
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"options": {"showLabels": true, "showTime": true, "wrapLogMessage": true}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 43,
|
||||||
|
"type": "logs",
|
||||||
|
"title": "Neo4j — all logs (live tail)",
|
||||||
|
"datasource": {"type": "loki", "uid": "${loki}"},
|
||||||
|
"gridPos": {"h": 10, "w": 24, "x": 0, "y": 45},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"refId": "A",
|
||||||
|
"expr": "{job=\"neo4j\"}",
|
||||||
|
"maxLines": 100
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"options": {"showLabels": true, "showTime": true, "wrapLogMessage": true}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user