Files
ouranos/ansible/alloy/puck/config.alloy.j2

291 lines
8.8 KiB
Django/Jinja

// Puck Alloy Configuration
// Red Panda Approved 🐼
// Services: Log collection, Process metrics, Docker/cAdvisor metrics
logging {
level = "{{alloy_log_level}}"
}
// ============================================================================
// LOG COLLECTION - Loki Forwarding
// ============================================================================
loki.source.file "system_logs" {
targets = [
{__path__ = "/var/log/syslog", job = "syslog"},
{__path__ = "/var/log/auth.log", job = "auth"},
]
forward_to = [loki.write.default.receiver]
}
// Journal relabel rules — tag Pallas-managed units (kottos now, mentor /
// iolaus later) with the same {service, project, component} schema used
// by Mnemosyne and Daedalus. Rules run top-to-bottom and STOP at the
// first target_label match per source, so the generic "systemd" fallback
// stays last. If a new Pallas host/project ever lands here, copy one of
// the blocks below and adjust SyslogIdentifier + project.
loki.relabel "journal_puck" {
forward_to = []
// Expose the systemd unit as an auxiliary label for debugging.
rule {
source_labels = ["__journal__systemd_unit"]
target_label = "unit"
}
// Kottos — Pallas FastAgent runtime for the engineering agent project.
// SyslogIdentifier=kottos is set in ouranos/ansible/kottos/kottos.service.j2.
rule {
source_labels = ["__journal_syslog_identifier"]
regex = "kottos"
target_label = "service"
replacement = "pallas"
}
rule {
source_labels = ["__journal_syslog_identifier"]
regex = "kottos"
target_label = "project"
replacement = "kottos"
}
// Alloy itself — useful to separate from the "systemd" bucket when the
// shipping pipeline misbehaves.
rule {
source_labels = ["__journal__systemd_unit"]
regex = "alloy\\.service"
target_label = "service"
replacement = "alloy"
}
// Default fallback — everything else becomes service="systemd". We
// also set job here for backwards compatibility with existing
// dashboards that filter on ``job="systemd"``.
rule {
source_labels = ["__journal__systemd_unit"]
regex = ".+"
target_label = "job"
replacement = "systemd"
}
}
loki.source.journal "systemd_logs" {
forward_to = [loki.write.default.receiver]
relabel_rules = loki.relabel.journal_puck.rules
labels = {
hostname = "{{inventory_hostname}}",
environment = "{{deployment_environment}}",
}
}
loki.source.syslog "angelia_logs" {
listener {
address = "127.0.0.1:{{angelia_syslog_port}}"
protocol = "tcp"
syslog_format = "{{ syslog_format }}"
labels = {
job = "angelia",
hostname = "{{inventory_hostname}}",
environment = "{{deployment_environment}}",
}
}
forward_to = [loki.write.default.receiver]
}
// Athena used to ship via syslog on {{athena_syslog_port}}; it logs to
// container stdout and is now picked up by the docker-socket block below
// (service="athena", component=app/mcp/nginx). The host_var is retained as a
// reserved port number but no listener binds to it — remove the var from the
// inventory when the rollout is verified.
loki.source.syslog "kairos_logs" {
listener {
address = "127.0.0.1:{{kairos_syslog_port}}"
protocol = "tcp"
syslog_format = "{{ syslog_format }}"
labels = {
job = "kairos",
hostname = "{{inventory_hostname}}",
environment = "{{deployment_environment}}",
}
}
forward_to = [loki.write.default.receiver]
}
// Mnemosyne used to ship via syslog on {{mnemosyne_syslog_port}}; it now
// logs line-delimited JSON to container stdout and is picked up by the
// docker-socket block below. The host_var is retained as a reserved port
// number but no listener binds to it — remove the var from the inventory
// when the rollout is verified.
loki.source.syslog "spelunker_logs" {
listener {
address = "127.0.0.1:{{spelunker_syslog_port}}"
protocol = "tcp"
syslog_format = "{{ syslog_format }}"
labels = {
job = "spelunker",
hostname = "{{inventory_hostname}}",
environment = "{{deployment_environment}}",
}
}
forward_to = [loki.write.default.receiver]
}
loki.source.syslog "jupyterlab_logs" {
listener {
address = "127.0.0.1:{{jupyterlab_syslog_port}}"
protocol = "tcp"
syslog_format = "{{ syslog_format }}"
labels = {
job = "jupyterlab",
hostname = "{{inventory_hostname}}",
environment = "{{deployment_environment}}",
}
}
forward_to = [loki.write.default.receiver]
}
// Daedalus also used to ship via syslog on {{daedalus_syslog_port}}; it
// already emits structlog JSON to stdout, so the docker-socket block
// below now handles it. Host_var kept for the same transitional reason
// as mnemosyne above.
// ----------------------------------------------------------------------------
// Docker socket — any compose project on this host lands in Loki with
// `service` = compose project (e.g. "mnemosyne", "daedalus", "kairos") and
// `component` = compose service (e.g. "app", "mcp", "worker", "nginx").
// This replaces per-service syslog listeners — one block covers every
// compose project, current and future.
//
// Requires: the Alloy process to have read access to /var/run/docker.sock
// (Ansible role should add the alloy user to the `docker` group). No Docker
// daemon changes required — we scrape the json-file driver, which is Docker's
// default and is pinned in each compose project's x-logging anchor.
// ----------------------------------------------------------------------------
discovery.docker "containers" {
host = "unix:///var/run/docker.sock"
refresh_interval = "30s"
}
discovery.relabel "containers" {
targets = discovery.docker.containers.targets
// Compose project → service label
rule {
source_labels = ["__meta_docker_container_label_com_docker_compose_project"]
target_label = "service"
}
// Compose service → component label
rule {
source_labels = ["__meta_docker_container_label_com_docker_compose_service"]
target_label = "component"
}
// Container name (for one-off / non-compose containers)
rule {
source_labels = ["__meta_docker_container_name"]
regex = "/(.*)"
target_label = "container"
}
// Fall back to the container name as `service` when compose labels are
// absent (e.g. a `docker run ...` container outside any compose project)
rule {
source_labels = ["service", "container"]
separator = "@"
regex = "@(.+)"
target_label = "service"
}
}
loki.source.docker "containers" {
host = "unix:///var/run/docker.sock"
targets = discovery.relabel.containers.output
forward_to = [loki.write.default.receiver]
labels = {
hostname = "{{inventory_hostname}}",
environment = "{{deployment_environment}}",
}
}
loki.write "default" {
endpoint {
url = "{{loki_url}}"
}
}
// ============================================================================
// METRICS COLLECTION - Prometheus Remote Write
// ============================================================================
// Unix/Node metrics - Incus-safe collectors only
// Disabled collectors that don't work in containers: hwmon, thermal, mdadm, powersupplyclass, nvme
prometheus.exporter.unix "default" {
include_exporter_metrics = true
disable_collectors = [
"arp",
"bcache",
"bonding",
"btrfs",
"hwmon",
"infiniband",
"ipvs",
"mdadm",
"nfs",
"nfsd",
"nvme",
"powersupplyclass",
"rapl",
"thermal_zone",
"zfs",
]
}
// Process exporter - Track all processes by command name
// Provides: namedprocess_namegroup_* metrics
prometheus.exporter.process "default" {
track_children = true
track_threads = true
gather_smaps = false
recheck_on_scrape = true
matcher {
name = "{% raw %}{{.Comm}}{% endraw %}"
cmdline = [".+"]
}
}
// cAdvisor - Docker container metrics
// Provides: container_* metrics for CPU, memory, network, disk
prometheus.exporter.cadvisor "default" {
docker_host = "unix:///var/run/docker.sock"
storage_duration = "5m"
docker_only = true
}
// Scrape all local exporters
prometheus.scrape "local_exporters" {
targets = concat(
prometheus.exporter.unix.default.targets,
prometheus.exporter.process.default.targets,
prometheus.exporter.cadvisor.default.targets,
)
forward_to = [prometheus.relabel.add_instance.receiver]
scrape_interval = "15s"
job_name = "puck"
}
// Add instance label for Prometheus compatibility
prometheus.relabel "add_instance" {
forward_to = [prometheus.remote_write.default.receiver]
rule {
target_label = "instance"
replacement = "{{inventory_hostname}}"
}
}
// Remote write to Prospero Prometheus
prometheus.remote_write "default" {
endpoint {
url = "{{prometheus_remote_write_url}}"
}
}