From 2eb0a02c95af09d38f59310c715314c259bd22fe Mon Sep 17 00:00:00 2001
From: Adar Nimrod <nimrod@shore.co.il>
Date: Fri, 13 Dec 2024 23:08:50 +0200
Subject: [PATCH] Ansible: Add failure notification to Debian servers.

Rather cool. I've seen it before and had it on my todo list.
---
 Ansible/roles/debian_server/files/dvb.service |  1 +
 .../files/failure-handler@.service            |  6 +++
 .../files/failure-notify@.service             | 15 +++++++
 Ansible/roles/debian_server/tasks/main.yml    | 39 +++++++++++++++++++
 .../roles/debian_server/tasks/maintenance.yml |  6 +++
 .../templates/btrfs_check.service.j2          |  1 +
 .../templates/btrfs_scrub.service.j2          |  1 +
 .../templates/maintenance-task.service.j2     |  1 +
 8 files changed, 70 insertions(+)
 create mode 100644 Ansible/roles/debian_server/files/failure-handler@.service
 create mode 100644 Ansible/roles/debian_server/files/failure-notify@.service

diff --git a/Ansible/roles/debian_server/files/dvb.service b/Ansible/roles/debian_server/files/dvb.service
index 37617a3..199fe30 100644
--- a/Ansible/roles/debian_server/files/dvb.service
+++ b/Ansible/roles/debian_server/files/dvb.service
@@ -3,6 +3,7 @@
 Description=Docker volume backup (dvb)
 ConditionACPower=true
 After=local-fs.target
+OnFailure=failure-notify@%n.service
 
 [Service]
 Type=exec
diff --git a/Ansible/roles/debian_server/files/failure-handler@.service b/Ansible/roles/debian_server/files/failure-handler@.service
new file mode 100644
index 0000000..76681ee
--- /dev/null
+++ b/Ansible/roles/debian_server/files/failure-handler@.service
@@ -0,0 +1,6 @@
+[Unit]
+Description=Failure handler for %i
+
+[Service]
+Type=oneshot
+ExecStart=curl "https://notify.shore.co.il/send?message=Service%20$ifailed%20on%20%H."
diff --git a/Ansible/roles/debian_server/files/failure-notify@.service b/Ansible/roles/debian_server/files/failure-notify@.service
new file mode 100644
index 0000000..3853c3b
--- /dev/null
+++ b/Ansible/roles/debian_server/files/failure-notify@.service
@@ -0,0 +1,15 @@
+[Unit]
+Description=Send failure notice for %i
+After=network-online.target
+
+[Service]
+Type=oneshot
+ExecStart=curl https://notify.shore.co.il/send?message=Service%%20%i%%20on%%20%q%%20failed.
+DynamicUser=yes
+PrivateDevices=yes
+ProtectSystem=strict
+ProtectHome=yes
+PrivateTmp=yes
+PrivateIPC=yes
+ProtectProc=invisible
+PrivateUsers=yes
diff --git a/Ansible/roles/debian_server/tasks/main.yml b/Ansible/roles/debian_server/tasks/main.yml
index a3a34e3..19dc387 100644
--- a/Ansible/roles/debian_server/tasks/main.yml
+++ b/Ansible/roles/debian_server/tasks/main.yml
@@ -148,3 +148,42 @@
         - toolbx
   tags:
     - always
+
+- name: Add the failure notify service
+  ansible.builtin.copy:
+    dest: /etc/systemd/system/
+    group: root
+    mode: preserve
+    owner: root
+    src: failure-notify@.service
+  tags: [notify]
+  notify:
+    - Systemd daemon reload
+
+- name: Create services drop-in directories for failure notifications
+  loop: &notify_failure_units
+    - containerd.service
+    - cron.service
+    - docker.service
+    - rsyslog.service
+    - ssh.service
+    - systemd-networkd.service
+    - systemd-timesyncd.service
+    - ufw.service
+  ansible.builtin.file:
+    path: /etc/systemd/system/{{ item }}.d
+    mode: 0o0755
+    state: directory
+  tags: [notify]
+
+- name: Add failure notification to services
+  loop: *notify_failure_units
+  ansible.builtin.copy:
+    content: |
+      [Unit]
+      OnFailure=failure-notify@%n.service
+    dest: /etc/systemd/system/{{ item }}.d/failure-notify.conf
+    mode: 0o0644
+  tags: [notify]
+  notify:
+    - Systemd daemon reload
diff --git a/Ansible/roles/debian_server/tasks/maintenance.yml b/Ansible/roles/debian_server/tasks/maintenance.yml
index ca0e82a..47e722d 100644
--- a/Ansible/roles/debian_server/tasks/maintenance.yml
+++ b/Ansible/roles/debian_server/tasks/maintenance.yml
@@ -60,3 +60,9 @@
     name: '{{ item["name"] }}'
   notify:
     - Systemd daemon reload
+
+- name: Create the directory for service overrides
+  ansible.builtin.file:
+    mode: 0o0755
+    path: /etc/systemd/system/service.d
+    state: directory
diff --git a/Ansible/roles/debian_server/templates/btrfs_check.service.j2 b/Ansible/roles/debian_server/templates/btrfs_check.service.j2
index d719989..7dd59b7 100644
--- a/Ansible/roles/debian_server/templates/btrfs_check.service.j2
+++ b/Ansible/roles/debian_server/templates/btrfs_check.service.j2
@@ -3,6 +3,7 @@
 Description=Check btrfs device {{ device }}
 After=local-fs.target
 After=network-online.target
+OnFailure=failure-notify@%n.service
 
 [Service]
 Type=exec
diff --git a/Ansible/roles/debian_server/templates/btrfs_scrub.service.j2 b/Ansible/roles/debian_server/templates/btrfs_scrub.service.j2
index 363628a..4278f52 100644
--- a/Ansible/roles/debian_server/templates/btrfs_scrub.service.j2
+++ b/Ansible/roles/debian_server/templates/btrfs_scrub.service.j2
@@ -3,6 +3,7 @@
 Description=Scrub btrfs device {{ device }}
 ConditionACPower=true
 After=local-fs.target
+OnFailure=failure-notify@%n.service
 
 [Service]
 Type=exec
diff --git a/Ansible/roles/debian_server/templates/maintenance-task.service.j2 b/Ansible/roles/debian_server/templates/maintenance-task.service.j2
index dc2c57c..109ea5e 100644
--- a/Ansible/roles/debian_server/templates/maintenance-task.service.j2
+++ b/Ansible/roles/debian_server/templates/maintenance-task.service.j2
@@ -4,6 +4,7 @@ Description={{ description|default(name) }}
 ConditionACPower=true
 After=local-fs.target
 After=network-online.target
+OnFailure=failure-notify@%n.service
 
 [Service]
 Type=exec
-- 
GitLab