diff --git a/roles/nut/README.md b/roles/nut/README.md new file mode 100644 index 0000000..ce0cb49 --- /dev/null +++ b/roles/nut/README.md @@ -0,0 +1,88 @@ +# nut — Network UPS Tools + +Monitors a UPS over USB (or serial/network), notifies via ntfy on power events +and gracefully shuts the host down on low battery. + +## Supported distributions + +- Arch Linux +- Debian/Ubuntu + +## What it does + +- Installs `nut` and configures it in **standalone** mode (single host, no + network slaves). +- Configures the `usbhid-ups` driver against the UPS defined in `nut_ups_name` + (default: EATON Ellipse 1600, vendorid `0463`). +- Binds `upsd` to `127.0.0.1:3493` only — no LAN exposure. +- Runs `upsmon` as master, which: + - calls `SHUTDOWNCMD` (`systemctl poweroff`) on `LOWBATT`, + - dispatches every event to a `NOTIFYCMD` wrapper that POSTs to ntfy with + severity, tags and a host-aware title. + +## Configuration + +Variables — see [defaults/main.yml](defaults/main.yml). + +Required (role asserts at start): + +```yaml +nut_monitor_password: "" # local upsd user used by upsmon + exporter +nut_ntfy_topic: "ups-" +``` + +Optional but commonly tweaked: + +```yaml +nut_ups_name: eaton +nut_ups_description: "EATON Ellipse 1600" +nut_ups_vendorid: "0463" +nut_ntfy_server: https://ntfy.jokester.fr +nut_ntfy_token: "tk_..." # publish token for nut_ntfy_topic +``` + +## Operations + +### Check UPS status + +```bash +upsc {{ nut_ups_name }}@localhost +``` + +### List configured UPSes + +```bash +upsc -l +``` + +### Test the NOTIFYCMD pipeline without unplugging + +```bash +sudo -u nut NOTIFYTYPE=ONBATT /usr/local/bin/ups-notify "Simulated ONBATT for ntfy plumbing test" +``` + +### Simulate a full power loss (DANGEROUS — actually powers off) + +```bash +sudo upsmon -c fsd +``` + +### Logs + +```bash +journalctl -u nut-monitor -u nut-server -u 'nut-driver@*' -f +``` + +## Security + +- `upsd` binds to `127.0.0.1` only. +- `upsd.users` mode `0640` owned by `root:nut`. +- No anonymous read access — exporter and upsmon both authenticate as + `nut_monitor_user`. +- udev rules shipped by the `nut` package grant USB device access to the `nut` + group only. + +## Companion role + +See [`nut_exporter`](../nut_exporter/README.md) to expose Prometheus metrics +based on the same upsd instance. diff --git a/roles/nut/defaults/main.yml b/roles/nut/defaults/main.yml new file mode 100644 index 0000000..e6fdda5 --- /dev/null +++ b/roles/nut/defaults/main.yml @@ -0,0 +1,63 @@ +--- +# NUT (Network UPS Tools) configuration +# See: https://networkupstools.org/docs/man/upsmon.conf.html + +# UPS definition +# -------------- +# Logical name of the UPS as referenced everywhere (ups.conf section, upsmon +# MONITOR line, nut_exporter ?ups= query parameter). +nut_ups_name: eaton + +# Human-readable description (shown in upsc output). +nut_ups_description: "EATON Ellipse 1600" + +# Driver to use. usbhid-ups covers all USB HID-compliant UPSes (EATON, APC, +# CyberPower, etc.). See: https://networkupstools.org/stable-hcl.html +nut_ups_driver: usbhid-ups + +# USB vendorid filter (EATON = 0463). Helps disambiguate if multiple USB HID +# devices are present. Leave empty to auto-detect. +nut_ups_vendorid: "0463" + +# Driver polling interval in seconds. Some Eaton/MGE units lock up if polled too +# aggressively (the default is 2). 10-15s gives the microcontroller breathing room. +nut_ups_pollinterval: 15 +# Number of connection attempts before the driver gives up. If the USB chip +# freezes, the driver will try to reopen the port up to this many times. +nut_ups_maxretry: 3 + +# upsd server +# ----------- +# Bind addresses for upsd. Keep localhost-only unless you want to monitor from +# other hosts (in which case add the wireguard IP and adjust firewall). +nut_upsd_listen: + - { addr: "127.0.0.1", port: 3493 } + +# Local monitor user used by upsmon and nut_exporter. Password must be set. +nut_monitor_user: monitor +# nut_monitor_password: "" # Intentionally undefined - role will fail if not set + +# upsmon (shutdown manager + NOTIFYCMD dispatcher) +# ------------------------------------------------ +# Battery charge percentage below which an early shutdown is triggered, even if +# the UPS has not yet asserted LOWBATT. Set to 0 to rely solely on LOWBATT. +nut_upsmon_minsupplies: 1 +nut_upsmon_pollfreq: 5 # seconds between polls when on line power +nut_upsmon_pollfreqalert: 5 # seconds between polls when on battery +nut_upsmon_deadtime: 15 # seconds before declaring a UPS dead +nut_upsmon_hostsync: 15 # seconds to wait for slaves before shutting down +nut_upsmon_finaldelay: 5 # seconds between SHUTDOWN notification and poweroff + +# Command run on the host once the master decides it is time to power off. +# systemctl poweroff is sufficient for a single-host standalone setup. +nut_upsmon_shutdown_cmd: "/usr/bin/systemctl poweroff" + +# ntfy notifications +# ------------------ +# Topic to publish UPS events to. Should be a dedicated topic for power events. +# nut_ntfy_topic: "" # Intentionally undefined - role will fail if not set +nut_ntfy_server: https://ntfy.jokester.fr +# nut_ntfy_token: "" # Intentionally undefined - unauthenticated if not set + +# Path of the deployed NOTIFYCMD wrapper. +nut_notify_script_path: /usr/local/bin/ups-notify diff --git a/roles/nut/handlers/main.yml b/roles/nut/handlers/main.yml new file mode 100644 index 0000000..7944b2d --- /dev/null +++ b/roles/nut/handlers/main.yml @@ -0,0 +1,15 @@ +--- +- name: Restart NUT driver enumerator + ansible.builtin.systemd: + name: nut-driver-enumerator.service + state: restarted + +- name: Restart NUT server + ansible.builtin.systemd: + name: nut-server.service + state: restarted + +- name: Restart NUT monitor + ansible.builtin.systemd: + name: nut-monitor.service + state: restarted diff --git a/roles/nut/meta/main.yml b/roles/nut/meta/main.yml new file mode 100644 index 0000000..23d65c7 --- /dev/null +++ b/roles/nut/meta/main.yml @@ -0,0 +1,2 @@ +--- +dependencies: [] diff --git a/roles/nut/tasks/main.yml b/roles/nut/tasks/main.yml new file mode 100644 index 0000000..189c535 --- /dev/null +++ b/roles/nut/tasks/main.yml @@ -0,0 +1,97 @@ +--- +- name: Validate required configuration + ansible.builtin.assert: + that: + - nut_monitor_password is defined + - nut_monitor_password | length >= 12 + - nut_ntfy_topic is defined + - nut_ntfy_topic | length > 0 + fail_msg: | + nut_monitor_password (>=12 chars) and nut_ntfy_topic are required. + See roles/nut/defaults/main.yml for configuration. + +- name: Load OS-specific variables + ansible.builtin.include_vars: "{{ item }}" + with_first_found: + - "{{ ansible_facts['os_family'] }}.yml" + - debian.yml + +- name: Install NUT + ansible.builtin.package: + name: "{{ nut_package }}" + state: present + +- name: Ensure NUT config directory exists + ansible.builtin.file: + path: "{{ nut_config_dir }}" + state: directory + owner: root + group: "{{ nut_group }}" + mode: "0750" + +- name: Set NUT to standalone mode + ansible.builtin.copy: + dest: "{{ nut_config_dir }}/nut.conf" + content: | + # Managed by Ansible - DO NOT EDIT MANUALLY + MODE=standalone + owner: root + group: "{{ nut_group }}" + mode: "0640" + notify: + - Restart NUT driver enumerator + - Restart NUT server + - Restart NUT monitor + +- name: Deploy ups.conf + ansible.builtin.template: + src: ups.conf.j2 + dest: "{{ nut_config_dir }}/ups.conf" + owner: root + group: "{{ nut_group }}" + mode: "0640" + notify: + - Restart NUT driver enumerator + - Restart NUT server + +- name: Deploy upsd.conf + ansible.builtin.template: + src: upsd.conf.j2 + dest: "{{ nut_config_dir }}/upsd.conf" + owner: root + group: "{{ nut_group }}" + mode: "0640" + notify: Restart NUT server + +- name: Deploy upsd.users + ansible.builtin.template: + src: upsd.users.j2 + dest: "{{ nut_config_dir }}/upsd.users" + owner: root + group: "{{ nut_group }}" + mode: "0640" + notify: Restart NUT server + +- name: Deploy ntfy NOTIFYCMD script + ansible.builtin.template: + src: ups-notify.sh.j2 + dest: "{{ nut_notify_script_path }}" + owner: root + group: root + mode: "0755" + +- name: Deploy upsmon.conf + ansible.builtin.template: + src: upsmon.conf.j2 + dest: "{{ nut_config_dir }}/upsmon.conf" + owner: root + group: "{{ nut_group }}" + mode: "0640" + notify: Restart NUT monitor + +- name: Enable and start NUT services + ansible.builtin.systemd: + name: "{{ item }}" + enabled: true + state: started + loop: "{{ nut_services }}" diff --git a/roles/nut/templates/ups-notify.sh.j2 b/roles/nut/templates/ups-notify.sh.j2 new file mode 100644 index 0000000..23c7da6 --- /dev/null +++ b/roles/nut/templates/ups-notify.sh.j2 @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# Managed by Ansible - DO NOT EDIT MANUALLY +# +# Wrapper invoked by upsmon as NOTIFYCMD. +# upsmon passes the rendered NOTIFYMSG as $1 and sets NOTIFYTYPE in the env. +# See: https://networkupstools.org/docs/man/upsmon.conf.html +set -euo pipefail + +NTFY_SERVER="{{ nut_ntfy_server }}" +NTFY_TOPIC="{{ nut_ntfy_topic }}" +{% if nut_ntfy_token is defined %} +NTFY_TOKEN="{{ nut_ntfy_token }}" +{% else %} +NTFY_TOKEN="" +{% endif %} + +MESSAGE="${1:-UPS event}" +EVENT="${NOTIFYTYPE:-UNKNOWN}" +HOST="$(uname -n)" + +case "$EVENT" in + ONBATT) + TITLE="UPS on battery — $HOST" + PRIORITY="urgent" + TAGS="warning,electric_plug" + ;; + LOWBATT) + TITLE="UPS low battery — $HOST" + PRIORITY="urgent" + TAGS="rotating_light,battery" + ;; + FSD|SHUTDOWN) + TITLE="UPS forced shutdown — $HOST" + PRIORITY="max" + TAGS="skull" + ;; + ONLINE) + TITLE="UPS back on line power — $HOST" + PRIORITY="default" + TAGS="white_check_mark,zap" + ;; + COMMBAD|NOCOMM) + TITLE="UPS communication lost — $HOST" + PRIORITY="high" + TAGS="warning,satellite" + ;; + COMMOK) + TITLE="UPS communication restored — $HOST" + PRIORITY="default" + TAGS="white_check_mark" + ;; + REPLBATT) + TITLE="UPS battery needs replacement — $HOST" + PRIORITY="high" + TAGS="battery,wrench" + ;; + *) + TITLE="UPS event ($EVENT) — $HOST" + PRIORITY="default" + TAGS="information_source" + ;; +esac + +auth_args=() +if [[ -n "$NTFY_TOKEN" ]]; then + auth_args=(-H "Authorization: Bearer $NTFY_TOKEN") +fi + +# --max-time is important: upsmon will hang on poweroff if curl blocks. +curl -fsS --max-time 10 \ + "${auth_args[@]}" \ + -H "Title: $TITLE" \ + -H "Priority: $PRIORITY" \ + -H "Tags: $TAGS" \ + -d "$MESSAGE" \ + "${NTFY_SERVER%/}/${NTFY_TOPIC}" >/dev/null || \ + logger -t ups-notify "Failed to publish ntfy notification for $EVENT" diff --git a/roles/nut/templates/ups.conf.j2 b/roles/nut/templates/ups.conf.j2 new file mode 100644 index 0000000..0635943 --- /dev/null +++ b/roles/nut/templates/ups.conf.j2 @@ -0,0 +1,12 @@ +# Managed by Ansible - DO NOT EDIT MANUALLY +# See: https://networkupstools.org/docs/man/ups.conf.html + +[{{ nut_ups_name }}] + driver = {{ nut_ups_driver }} + port = auto + desc = "{{ nut_ups_description }}" + pollinterval = {{ nut_ups_pollinterval }} + maxretry = {{ nut_ups_maxretry }} +{% if nut_ups_vendorid %} + vendorid = {{ nut_ups_vendorid }} +{% endif %} diff --git a/roles/nut/templates/upsd.conf.j2 b/roles/nut/templates/upsd.conf.j2 new file mode 100644 index 0000000..f96a11a --- /dev/null +++ b/roles/nut/templates/upsd.conf.j2 @@ -0,0 +1,6 @@ +# Managed by Ansible - DO NOT EDIT MANUALLY +# See: https://networkupstools.org/docs/man/upsd.conf.html + +{% for listen in nut_upsd_listen %} +LISTEN {{ listen.addr }} {{ listen.port }} +{% endfor %} diff --git a/roles/nut/templates/upsd.users.j2 b/roles/nut/templates/upsd.users.j2 new file mode 100644 index 0000000..9df74a8 --- /dev/null +++ b/roles/nut/templates/upsd.users.j2 @@ -0,0 +1,6 @@ +# Managed by Ansible - DO NOT EDIT MANUALLY +# See: https://networkupstools.org/docs/man/upsd.users.html + +[{{ nut_monitor_user }}] + password = {{ nut_monitor_password }} + upsmon master diff --git a/roles/nut/templates/upsmon.conf.j2 b/roles/nut/templates/upsmon.conf.j2 new file mode 100644 index 0000000..92d72ad --- /dev/null +++ b/roles/nut/templates/upsmon.conf.j2 @@ -0,0 +1,37 @@ +# Managed by Ansible - DO NOT EDIT MANUALLY +# See: https://networkupstools.org/docs/man/upsmon.conf.html + +MONITOR {{ nut_ups_name }}@localhost {{ nut_upsmon_minsupplies }} {{ nut_monitor_user }} {{ nut_monitor_password }} master + +MINSUPPLIES {{ nut_upsmon_minsupplies }} +SHUTDOWNCMD "{{ nut_upsmon_shutdown_cmd }}" +NOTIFYCMD "{{ nut_notify_script_path }}" + +POLLFREQ {{ nut_upsmon_pollfreq }} +POLLFREQALERT {{ nut_upsmon_pollfreqalert }} +DEADTIME {{ nut_upsmon_deadtime }} +HOSTSYNC {{ nut_upsmon_hostsync }} +FINALDELAY {{ nut_upsmon_finaldelay }} + +# Default notification messages (overridable per event). +NOTIFYMSG ONLINE "UPS %s is back on line power" +NOTIFYMSG ONBATT "UPS %s is on battery (mains lost)" +NOTIFYMSG LOWBATT "UPS %s battery is low — shutdown imminent" +NOTIFYMSG FSD "UPS %s forced shutdown in progress" +NOTIFYMSG COMMOK "Communications with UPS %s restored" +NOTIFYMSG COMMBAD "Communications with UPS %s lost" +NOTIFYMSG SHUTDOWN "System is shutting down due to UPS %s" +NOTIFYMSG REPLBATT "UPS %s battery needs replacement" +NOTIFYMSG NOCOMM "UPS %s is unavailable" + +# Route events through SYSLOG and the NOTIFYCMD wrapper. NUT also supports +# WALL (broadcast to logged-in users) but it's noisy and not useful here. +NOTIFYFLAG ONLINE SYSLOG+EXEC +NOTIFYFLAG ONBATT SYSLOG+EXEC +NOTIFYFLAG LOWBATT SYSLOG+EXEC +NOTIFYFLAG FSD SYSLOG+EXEC +NOTIFYFLAG COMMOK SYSLOG+EXEC +NOTIFYFLAG COMMBAD SYSLOG+EXEC +NOTIFYFLAG SHUTDOWN SYSLOG+EXEC +NOTIFYFLAG REPLBATT SYSLOG+EXEC +NOTIFYFLAG NOCOMM SYSLOG+EXEC diff --git a/roles/nut/vars/archlinux.yml b/roles/nut/vars/archlinux.yml new file mode 100644 index 0000000..e79960a --- /dev/null +++ b/roles/nut/vars/archlinux.yml @@ -0,0 +1,9 @@ +--- +nut_package: nut +nut_config_dir: /etc/nut +nut_user: nut +nut_group: nut +nut_services: + - nut-driver-enumerator.service + - nut-server.service + - nut-monitor.service diff --git a/roles/nut/vars/debian.yml b/roles/nut/vars/debian.yml new file mode 100644 index 0000000..e79960a --- /dev/null +++ b/roles/nut/vars/debian.yml @@ -0,0 +1,9 @@ +--- +nut_package: nut +nut_config_dir: /etc/nut +nut_user: nut +nut_group: nut +nut_services: + - nut-driver-enumerator.service + - nut-server.service + - nut-monitor.service diff --git a/roles/nut_exporter/README.md b/roles/nut_exporter/README.md new file mode 100644 index 0000000..8ed5fb6 --- /dev/null +++ b/roles/nut_exporter/README.md @@ -0,0 +1,60 @@ +# nut_exporter — Prometheus exporter for NUT + +Scrapes a local `upsd` and exposes UPS metrics for Prometheus. + +## Supported distributions + +- Arch Linux (AUR package `prometheus-nut-exporter`, installed via `paru`) + +Debian/Ubuntu is not packaged upstream — add it on demand. + +## Configuration + +See [defaults/main.yml](defaults/main.yml). + +Required: + +```yaml +nut_exporter_nut_password: "" +``` + +Optional: + +```yaml +nut_exporter_listen_address: "127.0.0.1:9199" +nut_exporter_nut_server: "127.0.0.1:3493" +nut_exporter_nut_user: monitor +``` + +## Pairing with Prometheus + +Typical scrape config (target uses the multi-target pattern: the exporter +queries a remote upsd specified in the URL parameters): + +```yaml +prometheus_scrape_configs: + - job_name: 'nut' + metrics_path: /nut + static_configs: + - targets: ['eaton@localhost'] # ups@host syntax + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: 127.0.0.1:9199 +``` + +## Operations + +```bash +systemctl status prometheus-nut-exporter +curl -s 'http://127.0.0.1:9199/nut?target=localhost&ups=eaton' | head +journalctl -u prometheus-nut-exporter -f +``` + +## Dependencies + +Requires the [`nut`](../nut/README.md) role (or any other running upsd) on the +same host. diff --git a/roles/nut_exporter/defaults/main.yml b/roles/nut_exporter/defaults/main.yml new file mode 100644 index 0000000..5a2b4ac --- /dev/null +++ b/roles/nut_exporter/defaults/main.yml @@ -0,0 +1,13 @@ +--- +# Prometheus NUT exporter configuration + +# Address the exporter listens on. +nut_exporter_listen_address: "127.0.0.1:9199" + +# upsd server to connect to (kept local — exporter sits next to upsd). +nut_exporter_nut_server: "127.0.0.1:3493" + +# Credentials used to log into upsd. These should match the upsd user defined +# by the nut role (nut_monitor_user / nut_monitor_password). +nut_exporter_nut_user: "{{ nut_monitor_user | default('monitor') }}" +# nut_exporter_nut_password: "" # Inherits nut_monitor_password by default diff --git a/roles/nut_exporter/handlers/main.yml b/roles/nut_exporter/handlers/main.yml new file mode 100644 index 0000000..e577c03 --- /dev/null +++ b/roles/nut_exporter/handlers/main.yml @@ -0,0 +1,9 @@ +--- +- name: Reload systemd + ansible.builtin.systemd: + daemon_reload: true + +- name: Restart nut_exporter + ansible.builtin.systemd: + name: "{{ nut_exporter_service }}" + state: restarted diff --git a/roles/nut_exporter/meta/main.yml b/roles/nut_exporter/meta/main.yml new file mode 100644 index 0000000..23d65c7 --- /dev/null +++ b/roles/nut_exporter/meta/main.yml @@ -0,0 +1,2 @@ +--- +dependencies: [] diff --git a/roles/nut_exporter/tasks/main.yml b/roles/nut_exporter/tasks/main.yml new file mode 100644 index 0000000..3ce4a18 --- /dev/null +++ b/roles/nut_exporter/tasks/main.yml @@ -0,0 +1,46 @@ +--- +- name: Validate required configuration + ansible.builtin.assert: + that: + - nut_exporter_nut_password is defined + - nut_exporter_nut_password | length >= 12 + fail_msg: | + nut_exporter_nut_password (>=12 chars) is required. + Usually set to the same value as nut_monitor_password. + +- name: Load OS-specific variables + ansible.builtin.include_vars: "{{ item }}" + with_first_found: + - "{{ ansible_facts['os_family'] }}.yml" + +- name: Install prometheus-nut-exporter (AUR via paru) + ansible.builtin.command: "paru -S --noconfirm --needed {{ nut_exporter_package }}" + register: nut_exporter_install + changed_when: "'there is nothing to do' not in nut_exporter_install.stdout | lower" + when: ansible_facts['os_family'] == 'Archlinux' + +- name: Ensure systemd override directory exists + ansible.builtin.file: + path: "{{ nut_exporter_override_dir }}" + state: directory + owner: root + group: root + mode: "0755" + +- name: Deploy systemd override (listen address + upsd credentials) + ansible.builtin.template: + src: override.conf.j2 + dest: "{{ nut_exporter_override_dir }}/override.conf" + owner: root + group: root + mode: "0640" + notify: + - Reload systemd + - Restart nut_exporter + +- name: Enable and start nut_exporter + ansible.builtin.systemd: + name: "{{ nut_exporter_service }}" + enabled: true + state: started + daemon_reload: true diff --git a/roles/nut_exporter/templates/override.conf.j2 b/roles/nut_exporter/templates/override.conf.j2 new file mode 100644 index 0000000..abca590 --- /dev/null +++ b/roles/nut_exporter/templates/override.conf.j2 @@ -0,0 +1,10 @@ +# Managed by Ansible - DO NOT EDIT MANUALLY +# Override for prometheus-nut-exporter to inject listen address and upsd +# credentials. The exporter reads NUT_EXPORTER_* env vars at startup. + +[Service] +Environment="HTTP_LISTEN_ADDRESS={{ nut_exporter_listen_address }}" +Environment="NUT_EXPORTER_SERVER={{ nut_exporter_nut_server.split(':')[0] }}" +Environment="NUT_EXPORTER_PORT={{ nut_exporter_nut_server.split(':')[1] }}" +Environment="NUT_EXPORTER_USERNAME={{ nut_exporter_nut_user }}" +Environment="NUT_EXPORTER_PASSWORD={{ nut_exporter_nut_password }}" diff --git a/roles/nut_exporter/vars/archlinux.yml b/roles/nut_exporter/vars/archlinux.yml new file mode 100644 index 0000000..0b66674 --- /dev/null +++ b/roles/nut_exporter/vars/archlinux.yml @@ -0,0 +1,6 @@ +--- +nut_exporter_package: prometheus-nut-exporter +nut_exporter_service: prometheus-nut-exporter.service +nut_exporter_user: nut-exporter +nut_exporter_group: nut-exporter +nut_exporter_override_dir: /etc/systemd/system/prometheus-nut-exporter.service.d