f3s/r-nodes: track NFS auto-repair script and systemd units in conf repo

Pull check-nfs-mount.sh, nfs-mount-monitor.service, and nfs-mount-monitor.timer from r0/r1/r2 (confirmed identical on all three nodes) into f3s/r-nodes/nfs-mount-monitor/. Add f3s/r-nodes/Rexfile with an idempotent nfs_mount_monitor task that pushes the files to all three r-nodes as root and reloads systemd when content changes. Wire the new Rexfile into the repo root Rexfile. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
author: Paul Buetow <paul@buetow.org> 2026-05-10 10:25:20 +0300
committer: Paul Buetow <paul@buetow.org> 2026-05-10 10:25:20 +0300
commit: 425c5fa03b5d2cb44470c70a8d976ca253d662e3 (patch)
tree: fdd58b5a09d838886a9fd9d28efbc205bc3c84f1
parent: 50bf1b1cda71c034c81971ea3eea32966fe66279 (diff)
5 files changed, 216 insertions, 0 deletions
diff --git a/Rexfile b/Rexfile
index 3f5a8eb..52f7e51 100644
--- a/Rexfile
+++ b/Rexfile
@@ -1,2 +1,3 @@
 require for <'*/Rexfile'>;
 require 'f3s/garage/Rexfile';
+require 'f3s/r-nodes/Rexfile';
diff --git a/f3s/r-nodes/Rexfile b/f3s/r-nodes/Rexfile
new file mode 100644
index 0000000..846b539
--- /dev/null
+++ b/f3s/r-nodes/Rexfile
@@ -0,0 +1,87 @@
+# Rex tasks for Rocky Linux r-nodes (r0, r1, r2) — k3s cluster VMs.
+#
+# Run from repository root:
+#   rex -f f3s/r-nodes/Rexfile nfs_mount_monitor
+#
+# All tasks connect as root (r-nodes require root for systemd and
+# /usr/local/bin writes; paul user has no sudo configured on these VMs).
+
+use Rex -feature => [ '1.14', 'exec_autodie' ];
+use Rex::Logger;
+use File::Basename qw(dirname);
+use File::Spec::Functions qw(catfile rel2abs);
+
+my $RNODES_DIR = dirname( rel2abs(__FILE__) );
+
+# All three k3s Rocky Linux VMs; root SSH is configured via authorized_keys.
+group r_nodes => qw(
+  192.168.1.120
+  192.168.1.121
+  192.168.1.122
+);
+
+user 'root';
+sudo FALSE;
+
+# Deploy in parallel — tasks are idempotent and independent per node.
+parallelism 3;
+
+# Deploy the NFS mount health-monitor script and its systemd units to
+# all three r-nodes, then reload systemd and restart the timer so the
+# new files take effect immediately.
+#
+# Files managed:
+#   /usr/local/bin/check-nfs-mount.sh       (monitor + auto-repair script)
+#   /etc/systemd/system/nfs-mount-monitor.service
+#   /etc/systemd/system/nfs-mount-monitor.timer
+#
+# Idempotent: Rex only writes the file when content changes; the
+# on_change handler reloads systemd and restarts the timer only when
+# something actually changed.
+desc 'Deploy NFS mount monitor script and systemd units to r0/r1/r2';
+task 'nfs_mount_monitor',
+  group => 'r_nodes',
+  sub {
+    my $monitor_dir = catfile( $RNODES_DIR, 'nfs-mount-monitor' );
+
+    # Reload flag — set to 1 if any file changed, so we only reload once.
+    my $changed = 0;
+
+    # Deploy the health-monitor script.
+    file '/usr/local/bin/check-nfs-mount.sh',
+      source    => catfile( $monitor_dir, 'check-nfs-mount.sh' ),
+      owner     => 'root',
+      group     => 'root',
+      mode      => '755',
+      on_change => sub { $changed = 1 };
+
+    # Deploy the systemd service unit.
+    file '/etc/systemd/system/nfs-mount-monitor.service',
+      source    => catfile( $monitor_dir, 'nfs-mount-monitor.service' ),
+      owner     => 'root',
+      group     => 'root',
+      mode      => '644',
+      on_change => sub { $changed = 1 };
+
+    # Deploy the systemd timer unit.
+    file '/etc/systemd/system/nfs-mount-monitor.timer',
+      source    => catfile( $monitor_dir, 'nfs-mount-monitor.timer' ),
+      owner     => 'root',
+      group     => 'root',
+      mode      => '644',
+      on_change => sub { $changed = 1 };
+
+    if ($changed) {
+        Rex::Logger::info('Files changed — reloading systemd and restarting timer');
+        run 'systemctl daemon-reload';
+        run 'systemctl restart nfs-mount-monitor.timer';
+    }
+
+    # Ensure the timer is enabled and running regardless of whether files changed.
+    service 'nfs-mount-monitor.timer', ensure => 'started';
+    run 'systemctl enable nfs-mount-monitor.timer';
+  };
+
+1;
+
+# vim: syntax=perl
diff --git a/f3s/r-nodes/nfs-mount-monitor/check-nfs-mount.sh b/f3s/r-nodes/nfs-mount-monitor/check-nfs-mount.sh
new file mode 100644
index 0000000..7fabed8
--- /dev/null
+++ b/f3s/r-nodes/nfs-mount-monitor/check-nfs-mount.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+# NFS mount health monitor — runs every 10 seconds via systemd timer
+# (nfs-mount-monitor.timer / nfs-mount-monitor.service)
+#
+# Checks whether /data/nfs/k3svolumes is mounted and responsive.
+# If the mount is stale or missing it attempts a remount, then a
+# fresh umount+mount cycle.  On a successful repair it force-deletes
+# any pods on this node that are stuck in Unknown/Pending/ContainerCreating,
+# allowing the kubelet to reschedule them against the now-healthy volume.
+#
+# Deploy via Rex: rex -f f3s/r-nodes/Rexfile nfs_mount_monitor
+
+MOUNT_POINT="/data/nfs/k3svolumes"
+LOCK_FILE="/var/run/nfs-mount-check.lock"
+
+# Use a lock file to prevent concurrent runs (timer fires every 10 s)
+if [ -f "$LOCK_FILE" ]; then
+    exit 0
+fi
+touch "$LOCK_FILE"
+trap "rm -f $LOCK_FILE" EXIT
+
+MOUNT_FIXED=0
+
+fix_mount () {
+    echo "Attempting to remount NFS mount $MOUNT_POINT"
+    if mount -o remount -f "$MOUNT_POINT" 2>/dev/null; then
+        echo "Remount command issued for $MOUNT_POINT"
+    else
+        echo "Failed to remount NFS mount $MOUNT_POINT"
+    fi
+
+    echo "Checking if $MOUNT_POINT is a mountpoint"
+    if mountpoint "$MOUNT_POINT" >/dev/null 2>&1; then
+        echo "$MOUNT_POINT is a valid mountpoint"
+    else
+        echo "$MOUNT_POINT is not a valid mountpoint, attempting mount"
+        if mount "$MOUNT_POINT"; then
+            echo "Successfully mounted $MOUNT_POINT"
+            MOUNT_FIXED=1
+            return
+        else
+            echo "Failed to mount $MOUNT_POINT"
+        fi
+    fi
+
+    echo "Attempting to unmount $MOUNT_POINT"
+    if umount -f "$MOUNT_POINT" 2>/dev/null; then
+        echo "Successfully unmounted $MOUNT_POINT"
+    else
+        echo "Failed to unmount $MOUNT_POINT (it might not be mounted)"
+    fi
+
+    echo "Attempting to mount $MOUNT_POINT"
+    if mount "$MOUNT_POINT"; then
+        echo "NFS mount $MOUNT_POINT mounted successfully"
+        MOUNT_FIXED=1
+        return
+    else
+        echo "Failed to mount NFS mount $MOUNT_POINT"
+    fi
+
+    echo "Failed to fix NFS mount $MOUNT_POINT"
+    exit 1
+}
+
+if ! mountpoint "$MOUNT_POINT" >/dev/null 2>&1; then
+    echo "NFS mount $MOUNT_POINT not found"
+    fix_mount
+fi
+
+if ! timeout 2s stat "$MOUNT_POINT" >/dev/null 2>&1; then
+    echo "NFS mount $MOUNT_POINT appears to be unresponsive"
+    fix_mount
+fi
+
+# After a successful remount, delete pods stuck on this node
+if [ "$MOUNT_FIXED" -eq 1 ]; then
+    echo "Mount was fixed, checking for stuck pods on this node..."
+    NODE=$(hostname)
+    export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
+    kubectl get pods --all-namespaces --field-selector="spec.nodeName=$NODE" \
+      -o json 2>/dev/null | jq -r '
+        .items[] |
+        select(
+          .status.phase == "Unknown" or
+          .status.phase == "Pending" or
+          (.status.conditions // [] | any(.type == "Ready" and .status == "False")) or
+          (.status.containerStatuses // [] | any(.state.waiting.reason == "ContainerCreating"))
+        ) | "\(.metadata.namespace) \(.metadata.name)"' | \
+      while read ns pod; do
+        echo "Deleting stuck pod $ns/$pod"
+        kubectl delete pod -n "$ns" "$pod" --grace-period=0 --force 2>&1
+      done
+fi
diff --git a/f3s/r-nodes/nfs-mount-monitor/nfs-mount-monitor.service b/f3s/r-nodes/nfs-mount-monitor/nfs-mount-monitor.service
new file mode 100644
index 0000000..6077e0c
--- /dev/null
+++ b/f3s/r-nodes/nfs-mount-monitor/nfs-mount-monitor.service
@@ -0,0 +1,15 @@
+# systemd one-shot service that runs check-nfs-mount.sh once per invocation.
+# Triggered by nfs-mount-monitor.timer (every 10 seconds).
+# Logs to the journal: journalctl -u nfs-mount-monitor
+#
+# Deploy via Rex: rex -f f3s/r-nodes/Rexfile nfs_mount_monitor
+
+[Unit]
+Description=NFS Mount Health Monitor
+After=network-online.target
+
+[Service]
+Type=oneshot
+ExecStart=/usr/local/bin/check-nfs-mount.sh
+StandardOutput=journal
+StandardError=journal
diff --git a/f3s/r-nodes/nfs-mount-monitor/nfs-mount-monitor.timer b/f3s/r-nodes/nfs-mount-monitor/nfs-mount-monitor.timer
new file mode 100644
index 0000000..31251f2
--- /dev/null
+++ b/f3s/r-nodes/nfs-mount-monitor/nfs-mount-monitor.timer
@@ -0,0 +1,18 @@
+# systemd timer that fires nfs-mount-monitor.service every 10 seconds.
+# AccuracySec=1s keeps the interval tight (default 1m would be too coarse).
+# OnBootSec=30s gives the network and NFS client time to come up before
+# the first check fires.
+#
+# Deploy via Rex: rex -f f3s/r-nodes/Rexfile nfs_mount_monitor
+
+[Unit]
+Description=Run NFS Mount Health Monitor every 10 seconds
+Requires=nfs-mount-monitor.service
+
+[Timer]
+OnBootSec=30s
+OnUnitActiveSec=10s
+AccuracySec=1s
+
+[Install]
+WantedBy=timers.target
author	Paul Buetow <paul@buetow.org>	2026-05-10 10:25:20 +0300
committer	Paul Buetow <paul@buetow.org>	2026-05-10 10:25:20 +0300
commit	425c5fa03b5d2cb44470c70a8d976ca253d662e3 (patch)
tree	fdd58b5a09d838886a9fd9d28efbc205bc3c84f1
parent	50bf1b1cda71c034c81971ea3eea32966fe66279 (diff)