Introduce structured journal relabel rules on puck to tag Pallas-managed
units with {service, project, component} labels matching the Mnemosyne
and Daedalus schema. Add kottos release variable and vault secrets
example entries for the new Pallas FastAgent runtime.
Remove the defunct mnemosyne syslog listener now that Mnemosyne ships
JSON logs via the docker-socket pipeline.
299 lines
8.8 KiB
Django/Jinja
299 lines
8.8 KiB
Django/Jinja
// Puck Alloy Configuration
|
|
// Red Panda Approved 🐼
|
|
// Services: Log collection, Process metrics, Docker/cAdvisor metrics
|
|
|
|
logging {
|
|
level = "{{alloy_log_level}}"
|
|
}
|
|
|
|
// ============================================================================
|
|
// LOG COLLECTION - Loki Forwarding
|
|
// ============================================================================
|
|
|
|
loki.source.file "system_logs" {
|
|
targets = [
|
|
{__path__ = "/var/log/syslog", job = "syslog"},
|
|
{__path__ = "/var/log/auth.log", job = "auth"},
|
|
]
|
|
forward_to = [loki.write.default.receiver]
|
|
}
|
|
|
|
// Journal relabel rules — tag Pallas-managed units (kottos now, mentor /
|
|
// iolaus later) with the same {service, project, component} schema used
|
|
// by Mnemosyne and Daedalus. Rules run top-to-bottom and STOP at the
|
|
// first target_label match per source, so the generic "systemd" fallback
|
|
// stays last. If a new Pallas host/project ever lands here, copy one of
|
|
// the blocks below and adjust SyslogIdentifier + project.
|
|
loki.relabel "journal_puck" {
|
|
forward_to = []
|
|
|
|
// Expose the systemd unit as an auxiliary label for debugging.
|
|
rule {
|
|
source_labels = ["__journal__systemd_unit"]
|
|
target_label = "unit"
|
|
}
|
|
|
|
// Kottos — Pallas FastAgent runtime for the engineering agent project.
|
|
// SyslogIdentifier=kottos is set in ouranos/ansible/kottos/kottos.service.j2.
|
|
rule {
|
|
source_labels = ["__journal_syslog_identifier"]
|
|
regex = "kottos"
|
|
target_label = "service"
|
|
replacement = "pallas"
|
|
}
|
|
rule {
|
|
source_labels = ["__journal_syslog_identifier"]
|
|
regex = "kottos"
|
|
target_label = "project"
|
|
replacement = "kottos"
|
|
}
|
|
|
|
// Alloy itself — useful to separate from the "systemd" bucket when the
|
|
// shipping pipeline misbehaves.
|
|
rule {
|
|
source_labels = ["__journal__systemd_unit"]
|
|
regex = "alloy\\.service"
|
|
target_label = "service"
|
|
replacement = "alloy"
|
|
}
|
|
|
|
// Default fallback — everything else becomes service="systemd". We
|
|
// also set job here for backwards compatibility with existing
|
|
// dashboards that filter on ``job="systemd"``.
|
|
rule {
|
|
source_labels = ["__journal__systemd_unit"]
|
|
regex = ".+"
|
|
target_label = "job"
|
|
replacement = "systemd"
|
|
}
|
|
}
|
|
|
|
loki.source.journal "systemd_logs" {
|
|
forward_to = [loki.write.default.receiver]
|
|
relabel_rules = loki.relabel.journal_puck.rules
|
|
labels = {
|
|
hostname = "{{inventory_hostname}}",
|
|
environment = "{{deployment_environment}}",
|
|
}
|
|
}
|
|
|
|
loki.source.syslog "angelia_logs" {
|
|
listener {
|
|
address = "127.0.0.1:{{angelia_syslog_port}}"
|
|
protocol = "tcp"
|
|
syslog_format = "{{ syslog_format }}"
|
|
labels = {
|
|
job = "angelia",
|
|
hostname = "{{inventory_hostname}}",
|
|
environment = "{{deployment_environment}}",
|
|
}
|
|
}
|
|
forward_to = [loki.write.default.receiver]
|
|
}
|
|
|
|
loki.source.syslog "athena_logs" {
|
|
listener {
|
|
address = "127.0.0.1:{{athena_syslog_port}}"
|
|
protocol = "tcp"
|
|
syslog_format = "{{ syslog_format }}"
|
|
labels = {
|
|
job = "athena",
|
|
hostname = "{{inventory_hostname}}",
|
|
environment = "{{deployment_environment}}",
|
|
}
|
|
}
|
|
forward_to = [loki.write.default.receiver]
|
|
}
|
|
|
|
loki.source.syslog "kairos_logs" {
|
|
listener {
|
|
address = "127.0.0.1:{{kairos_syslog_port}}"
|
|
protocol = "tcp"
|
|
syslog_format = "{{ syslog_format }}"
|
|
labels = {
|
|
job = "kairos",
|
|
hostname = "{{inventory_hostname}}",
|
|
environment = "{{deployment_environment}}",
|
|
}
|
|
}
|
|
forward_to = [loki.write.default.receiver]
|
|
}
|
|
|
|
// Mnemosyne used to ship via syslog on {{mnemosyne_syslog_port}}; it now
|
|
// logs line-delimited JSON to container stdout and is picked up by the
|
|
// docker-socket block below. The host_var is retained as a reserved port
|
|
// number but no listener binds to it — remove the var from the inventory
|
|
// when the rollout is verified.
|
|
|
|
loki.source.syslog "spelunker_logs" {
|
|
listener {
|
|
address = "127.0.0.1:{{spelunker_syslog_port}}"
|
|
protocol = "tcp"
|
|
syslog_format = "{{ syslog_format }}"
|
|
labels = {
|
|
job = "spelunker",
|
|
hostname = "{{inventory_hostname}}",
|
|
environment = "{{deployment_environment}}",
|
|
}
|
|
}
|
|
forward_to = [loki.write.default.receiver]
|
|
}
|
|
|
|
loki.source.syslog "jupyterlab_logs" {
|
|
listener {
|
|
address = "127.0.0.1:{{jupyterlab_syslog_port}}"
|
|
protocol = "tcp"
|
|
syslog_format = "{{ syslog_format }}"
|
|
labels = {
|
|
job = "jupyterlab",
|
|
hostname = "{{inventory_hostname}}",
|
|
environment = "{{deployment_environment}}",
|
|
}
|
|
}
|
|
forward_to = [loki.write.default.receiver]
|
|
}
|
|
|
|
// Daedalus also used to ship via syslog on {{daedalus_syslog_port}}; it
|
|
// already emits structlog JSON to stdout, so the docker-socket block
|
|
// below now handles it. Host_var kept for the same transitional reason
|
|
// as mnemosyne above.
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// Docker socket — any compose project on this host lands in Loki with
|
|
// `service` = compose project (e.g. "mnemosyne", "daedalus", "kairos") and
|
|
// `component` = compose service (e.g. "app", "mcp", "worker", "nginx").
|
|
// This replaces per-service syslog listeners — one block covers every
|
|
// compose project, current and future.
|
|
//
|
|
// Requires: the Alloy process to have read access to /var/run/docker.sock
|
|
// (Ansible role should add the alloy user to the `docker` group). No Docker
|
|
// daemon changes required — we scrape the json-file driver, which is Docker's
|
|
// default and is pinned in each compose project's x-logging anchor.
|
|
// ----------------------------------------------------------------------------
|
|
discovery.docker "containers" {
|
|
host = "unix:///var/run/docker.sock"
|
|
refresh_interval = "30s"
|
|
}
|
|
|
|
discovery.relabel "containers" {
|
|
targets = discovery.docker.containers.targets
|
|
|
|
// Compose project → service label
|
|
rule {
|
|
source_labels = ["__meta_docker_container_label_com_docker_compose_project"]
|
|
target_label = "service"
|
|
}
|
|
// Compose service → component label
|
|
rule {
|
|
source_labels = ["__meta_docker_container_label_com_docker_compose_service"]
|
|
target_label = "component"
|
|
}
|
|
// Container name (for one-off / non-compose containers)
|
|
rule {
|
|
source_labels = ["__meta_docker_container_name"]
|
|
regex = "/(.*)"
|
|
target_label = "container"
|
|
}
|
|
// Fall back to the container name as `service` when compose labels are
|
|
// absent (e.g. a `docker run ...` container outside any compose project)
|
|
rule {
|
|
source_labels = ["service", "container"]
|
|
separator = "@"
|
|
regex = "@(.+)"
|
|
target_label = "service"
|
|
}
|
|
}
|
|
|
|
loki.source.docker "containers" {
|
|
host = "unix:///var/run/docker.sock"
|
|
targets = discovery.relabel.containers.output
|
|
forward_to = [loki.write.default.receiver]
|
|
labels = {
|
|
hostname = "{{inventory_hostname}}",
|
|
environment = "{{deployment_environment}}",
|
|
}
|
|
}
|
|
|
|
loki.write "default" {
|
|
endpoint {
|
|
url = "{{loki_url}}"
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// METRICS COLLECTION - Prometheus Remote Write
|
|
// ============================================================================
|
|
|
|
// Unix/Node metrics - Incus-safe collectors only
|
|
// Disabled collectors that don't work in containers: hwmon, thermal, mdadm, powersupplyclass, nvme
|
|
prometheus.exporter.unix "default" {
|
|
include_exporter_metrics = true
|
|
disable_collectors = [
|
|
"arp",
|
|
"bcache",
|
|
"bonding",
|
|
"btrfs",
|
|
"hwmon",
|
|
"infiniband",
|
|
"ipvs",
|
|
"mdadm",
|
|
"nfs",
|
|
"nfsd",
|
|
"nvme",
|
|
"powersupplyclass",
|
|
"rapl",
|
|
"thermal_zone",
|
|
"zfs",
|
|
]
|
|
}
|
|
|
|
// Process exporter - Track all processes by command name
|
|
// Provides: namedprocess_namegroup_* metrics
|
|
prometheus.exporter.process "default" {
|
|
track_children = true
|
|
track_threads = true
|
|
gather_smaps = false
|
|
recheck_on_scrape = true
|
|
|
|
matcher {
|
|
name = "{% raw %}{{.Comm}}{% endraw %}"
|
|
cmdline = [".+"]
|
|
}
|
|
}
|
|
|
|
// cAdvisor - Docker container metrics
|
|
// Provides: container_* metrics for CPU, memory, network, disk
|
|
prometheus.exporter.cadvisor "default" {
|
|
docker_host = "unix:///var/run/docker.sock"
|
|
storage_duration = "5m"
|
|
docker_only = true
|
|
}
|
|
|
|
// Scrape all local exporters
|
|
prometheus.scrape "local_exporters" {
|
|
targets = concat(
|
|
prometheus.exporter.unix.default.targets,
|
|
prometheus.exporter.process.default.targets,
|
|
prometheus.exporter.cadvisor.default.targets,
|
|
)
|
|
forward_to = [prometheus.relabel.add_instance.receiver]
|
|
scrape_interval = "15s"
|
|
job_name = "puck"
|
|
}
|
|
|
|
// Add instance label for Prometheus compatibility
|
|
prometheus.relabel "add_instance" {
|
|
forward_to = [prometheus.remote_write.default.receiver]
|
|
|
|
rule {
|
|
target_label = "instance"
|
|
replacement = "{{inventory_hostname}}"
|
|
}
|
|
}
|
|
|
|
// Remote write to Prospero Prometheus
|
|
prometheus.remote_write "default" {
|
|
endpoint {
|
|
url = "{{prometheus_remote_write_url}}"
|
|
}
|
|
} |