291 lines
8.8 KiB
Django/Jinja
291 lines
8.8 KiB
Django/Jinja
// Puck Alloy Configuration
|
|
// Red Panda Approved 🐼
|
|
// Services: Log collection, Process metrics, Docker/cAdvisor metrics
|
|
|
|
logging {
|
|
level = "{{alloy_log_level}}"
|
|
}
|
|
|
|
// ============================================================================
|
|
// LOG COLLECTION - Loki Forwarding
|
|
// ============================================================================
|
|
|
|
loki.source.file "system_logs" {
|
|
targets = [
|
|
{__path__ = "/var/log/syslog", job = "syslog"},
|
|
{__path__ = "/var/log/auth.log", job = "auth"},
|
|
]
|
|
forward_to = [loki.write.default.receiver]
|
|
}
|
|
|
|
// Journal relabel rules — tag Pallas-managed units (kottos now, mentor /
|
|
// iolaus later) with the same {service, project, component} schema used
|
|
// by Mnemosyne and Daedalus. Rules run top-to-bottom and STOP at the
|
|
// first target_label match per source, so the generic "systemd" fallback
|
|
// stays last. If a new Pallas host/project ever lands here, copy one of
|
|
// the blocks below and adjust SyslogIdentifier + project.
|
|
loki.relabel "journal_puck" {
|
|
forward_to = []
|
|
|
|
// Expose the systemd unit as an auxiliary label for debugging.
|
|
rule {
|
|
source_labels = ["__journal__systemd_unit"]
|
|
target_label = "unit"
|
|
}
|
|
|
|
// Kottos — Pallas FastAgent runtime for the engineering agent project.
|
|
// SyslogIdentifier=kottos is set in ouranos/ansible/kottos/kottos.service.j2.
|
|
rule {
|
|
source_labels = ["__journal_syslog_identifier"]
|
|
regex = "kottos"
|
|
target_label = "service"
|
|
replacement = "pallas"
|
|
}
|
|
rule {
|
|
source_labels = ["__journal_syslog_identifier"]
|
|
regex = "kottos"
|
|
target_label = "project"
|
|
replacement = "kottos"
|
|
}
|
|
|
|
// Alloy itself — useful to separate from the "systemd" bucket when the
|
|
// shipping pipeline misbehaves.
|
|
rule {
|
|
source_labels = ["__journal__systemd_unit"]
|
|
regex = "alloy\\.service"
|
|
target_label = "service"
|
|
replacement = "alloy"
|
|
}
|
|
|
|
// Default fallback — everything else becomes service="systemd". We
|
|
// also set job here for backwards compatibility with existing
|
|
// dashboards that filter on ``job="systemd"``.
|
|
rule {
|
|
source_labels = ["__journal__systemd_unit"]
|
|
regex = ".+"
|
|
target_label = "job"
|
|
replacement = "systemd"
|
|
}
|
|
}
|
|
|
|
loki.source.journal "systemd_logs" {
|
|
forward_to = [loki.write.default.receiver]
|
|
relabel_rules = loki.relabel.journal_puck.rules
|
|
labels = {
|
|
hostname = "{{inventory_hostname}}",
|
|
environment = "{{deployment_environment}}",
|
|
}
|
|
}
|
|
|
|
loki.source.syslog "angelia_logs" {
|
|
listener {
|
|
address = "127.0.0.1:{{angelia_syslog_port}}"
|
|
protocol = "tcp"
|
|
syslog_format = "{{ syslog_format }}"
|
|
labels = {
|
|
job = "angelia",
|
|
hostname = "{{inventory_hostname}}",
|
|
environment = "{{deployment_environment}}",
|
|
}
|
|
}
|
|
forward_to = [loki.write.default.receiver]
|
|
}
|
|
|
|
// Athena used to ship via syslog on {{athena_syslog_port}}; it logs to
|
|
// container stdout and is now picked up by the docker-socket block below
|
|
// (service="athena", component=app/mcp/nginx). The host_var is retained as a
|
|
// reserved port number but no listener binds to it — remove the var from the
|
|
// inventory when the rollout is verified.
|
|
|
|
loki.source.syslog "kairos_logs" {
|
|
listener {
|
|
address = "127.0.0.1:{{kairos_syslog_port}}"
|
|
protocol = "tcp"
|
|
syslog_format = "{{ syslog_format }}"
|
|
labels = {
|
|
job = "kairos",
|
|
hostname = "{{inventory_hostname}}",
|
|
environment = "{{deployment_environment}}",
|
|
}
|
|
}
|
|
forward_to = [loki.write.default.receiver]
|
|
}
|
|
|
|
// Mnemosyne used to ship via syslog on {{mnemosyne_syslog_port}}; it now
|
|
// logs line-delimited JSON to container stdout and is picked up by the
|
|
// docker-socket block below. The host_var is retained as a reserved port
|
|
// number but no listener binds to it — remove the var from the inventory
|
|
// when the rollout is verified.
|
|
|
|
loki.source.syslog "spelunker_logs" {
|
|
listener {
|
|
address = "127.0.0.1:{{spelunker_syslog_port}}"
|
|
protocol = "tcp"
|
|
syslog_format = "{{ syslog_format }}"
|
|
labels = {
|
|
job = "spelunker",
|
|
hostname = "{{inventory_hostname}}",
|
|
environment = "{{deployment_environment}}",
|
|
}
|
|
}
|
|
forward_to = [loki.write.default.receiver]
|
|
}
|
|
|
|
loki.source.syslog "jupyterlab_logs" {
|
|
listener {
|
|
address = "127.0.0.1:{{jupyterlab_syslog_port}}"
|
|
protocol = "tcp"
|
|
syslog_format = "{{ syslog_format }}"
|
|
labels = {
|
|
job = "jupyterlab",
|
|
hostname = "{{inventory_hostname}}",
|
|
environment = "{{deployment_environment}}",
|
|
}
|
|
}
|
|
forward_to = [loki.write.default.receiver]
|
|
}
|
|
|
|
// Daedalus also used to ship via syslog on {{daedalus_syslog_port}}; it
|
|
// already emits structlog JSON to stdout, so the docker-socket block
|
|
// below now handles it. Host_var kept for the same transitional reason
|
|
// as mnemosyne above.
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// Docker socket — any compose project on this host lands in Loki with
|
|
// `service` = compose project (e.g. "mnemosyne", "daedalus", "kairos") and
|
|
// `component` = compose service (e.g. "app", "mcp", "worker", "nginx").
|
|
// This replaces per-service syslog listeners — one block covers every
|
|
// compose project, current and future.
|
|
//
|
|
// Requires: the Alloy process to have read access to /var/run/docker.sock
|
|
// (Ansible role should add the alloy user to the `docker` group). No Docker
|
|
// daemon changes required — we scrape the json-file driver, which is Docker's
|
|
// default and is pinned in each compose project's x-logging anchor.
|
|
// ----------------------------------------------------------------------------
|
|
discovery.docker "containers" {
|
|
host = "unix:///var/run/docker.sock"
|
|
refresh_interval = "30s"
|
|
}
|
|
|
|
discovery.relabel "containers" {
|
|
targets = discovery.docker.containers.targets
|
|
|
|
// Compose project → service label
|
|
rule {
|
|
source_labels = ["__meta_docker_container_label_com_docker_compose_project"]
|
|
target_label = "service"
|
|
}
|
|
// Compose service → component label
|
|
rule {
|
|
source_labels = ["__meta_docker_container_label_com_docker_compose_service"]
|
|
target_label = "component"
|
|
}
|
|
// Container name (for one-off / non-compose containers)
|
|
rule {
|
|
source_labels = ["__meta_docker_container_name"]
|
|
regex = "/(.*)"
|
|
target_label = "container"
|
|
}
|
|
// Fall back to the container name as `service` when compose labels are
|
|
// absent (e.g. a `docker run ...` container outside any compose project)
|
|
rule {
|
|
source_labels = ["service", "container"]
|
|
separator = "@"
|
|
regex = "@(.+)"
|
|
target_label = "service"
|
|
}
|
|
}
|
|
|
|
loki.source.docker "containers" {
|
|
host = "unix:///var/run/docker.sock"
|
|
targets = discovery.relabel.containers.output
|
|
forward_to = [loki.write.default.receiver]
|
|
labels = {
|
|
hostname = "{{inventory_hostname}}",
|
|
environment = "{{deployment_environment}}",
|
|
}
|
|
}
|
|
|
|
loki.write "default" {
|
|
endpoint {
|
|
url = "{{loki_url}}"
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// METRICS COLLECTION - Prometheus Remote Write
|
|
// ============================================================================
|
|
|
|
// Unix/Node metrics - Incus-safe collectors only
|
|
// Disabled collectors that don't work in containers: hwmon, thermal, mdadm, powersupplyclass, nvme
|
|
prometheus.exporter.unix "default" {
|
|
include_exporter_metrics = true
|
|
disable_collectors = [
|
|
"arp",
|
|
"bcache",
|
|
"bonding",
|
|
"btrfs",
|
|
"hwmon",
|
|
"infiniband",
|
|
"ipvs",
|
|
"mdadm",
|
|
"nfs",
|
|
"nfsd",
|
|
"nvme",
|
|
"powersupplyclass",
|
|
"rapl",
|
|
"thermal_zone",
|
|
"zfs",
|
|
]
|
|
}
|
|
|
|
// Process exporter - Track all processes by command name
|
|
// Provides: namedprocess_namegroup_* metrics
|
|
prometheus.exporter.process "default" {
|
|
track_children = true
|
|
track_threads = true
|
|
gather_smaps = false
|
|
recheck_on_scrape = true
|
|
|
|
matcher {
|
|
name = "{% raw %}{{.Comm}}{% endraw %}"
|
|
cmdline = [".+"]
|
|
}
|
|
}
|
|
|
|
// cAdvisor - Docker container metrics
|
|
// Provides: container_* metrics for CPU, memory, network, disk
|
|
prometheus.exporter.cadvisor "default" {
|
|
docker_host = "unix:///var/run/docker.sock"
|
|
storage_duration = "5m"
|
|
docker_only = true
|
|
}
|
|
|
|
// Scrape all local exporters
|
|
prometheus.scrape "local_exporters" {
|
|
targets = concat(
|
|
prometheus.exporter.unix.default.targets,
|
|
prometheus.exporter.process.default.targets,
|
|
prometheus.exporter.cadvisor.default.targets,
|
|
)
|
|
forward_to = [prometheus.relabel.add_instance.receiver]
|
|
scrape_interval = "15s"
|
|
job_name = "puck"
|
|
}
|
|
|
|
// Add instance label for Prometheus compatibility
|
|
prometheus.relabel "add_instance" {
|
|
forward_to = [prometheus.remote_write.default.receiver]
|
|
|
|
rule {
|
|
target_label = "instance"
|
|
replacement = "{{inventory_hostname}}"
|
|
}
|
|
}
|
|
|
|
// Remote write to Prospero Prometheus
|
|
prometheus.remote_write "default" {
|
|
endpoint {
|
|
url = "{{prometheus_remote_write_url}}"
|
|
}
|
|
} |