summaryrefslogtreecommitdiff
path: root/gemfeed/2025-07-14-f3s-kubernetes-with-freebsd-part-6.gmi.tpl
diff options
context:
space:
mode:
Diffstat (limited to 'gemfeed/2025-07-14-f3s-kubernetes-with-freebsd-part-6.gmi.tpl')
-rw-r--r--gemfeed/2025-07-14-f3s-kubernetes-with-freebsd-part-6.gmi.tpl34
1 files changed, 33 insertions, 1 deletions
diff --git a/gemfeed/2025-07-14-f3s-kubernetes-with-freebsd-part-6.gmi.tpl b/gemfeed/2025-07-14-f3s-kubernetes-with-freebsd-part-6.gmi.tpl
index 96ba9ac6..c15c78c3 100644
--- a/gemfeed/2025-07-14-f3s-kubernetes-with-freebsd-part-6.gmi.tpl
+++ b/gemfeed/2025-07-14-f3s-kubernetes-with-freebsd-part-6.gmi.tpl
@@ -1,6 +1,6 @@
# f3s: Kubernetes with FreeBSD - Part 6: Storage
-> Published at 2025-07-13T16:44:29+03:00, last updated Tue 27 Jan 10:09:08 EET 2026
+> Published at 2025-07-13T16:44:29+03:00, last updated Wed 19 Mar 2026
This is the sixth blog post about the f3s series for self-hosting demands in a home lab. f3s? The "f" stands for FreeBSD, and the "3s" stands for k3s, the Kubernetes distribution used on FreeBSD-based physical machines.
@@ -1581,6 +1581,8 @@ fi
touch "$LOCK_FILE"
trap "rm -f $LOCK_FILE" EXIT
+MOUNT_FIXED=0
+
fix_mount () {
echo "Attempting to remount NFS mount $MOUNT_POINT"
if mount -o remount -f "$MOUNT_POINT" 2>/dev/null; then
@@ -1596,6 +1598,7 @@ fix_mount () {
echo "$MOUNT_POINT is not a valid mountpoint, attempting mount"
if mount "$MOUNT_POINT"; then
echo "Successfully mounted $MOUNT_POINT"
+ MOUNT_FIXED=1
return
else
echo "Failed to mount $MOUNT_POINT"
@@ -1612,6 +1615,7 @@ fix_mount () {
echo "Attempting to mount $MOUNT_POINT"
if mount "$MOUNT_POINT"; then
echo "NFS mount $MOUNT_POINT mounted successfully"
+ MOUNT_FIXED=1
return
else
echo "Failed to mount NFS mount $MOUNT_POINT"
@@ -1630,6 +1634,30 @@ if ! timeout 2s stat "$MOUNT_POINT" >/dev/null 2>&1; then
echo "NFS mount $MOUNT_POINT appears to be unresponsive"
fix_mount
fi
+
+# After a successful remount, delete pods stuck on this node
+if [ "$MOUNT_FIXED" -eq 1 ]; then
+ echo "Mount was fixed, checking for stuck pods on this node..."
+ NODE=$(hostname)
+ export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
+ kubectl get pods --all-namespaces \
+ --field-selector="spec.nodeName=$NODE" \
+ -o json 2>/dev/null | jq -r '
+ .items[] |
+ select(
+ .status.phase == "Unknown" or
+ .status.phase == "Pending" or
+ (.status.conditions // [] |
+ any(.type == "Ready" and .status == "False")) or
+ (.status.containerStatuses // [] |
+ any(.state.waiting.reason == "ContainerCreating"))
+ ) | "\(.metadata.namespace) \(.metadata.name)"' | \
+ while read ns pod; do
+ echo "Deleting stuck pod $ns/$pod"
+ kubectl delete pod -n "$ns" "$pod" \
+ --grace-period=0 --force 2>&1
+ done
+fi
EOF
[root@r0 ~]# chmod +x /usr/local/bin/check-nfs-mount.sh
@@ -1689,6 +1717,10 @@ To enable and start the timer, we run:
Note: Stale file handles are inherent to NFS failover because file handles are server-specific. The best approach depends on your application's tolerance for brief disruptions. Of course, all the changes made to `r0` above must also be applied to `r1` and `r2`.
+> Updated Wed 19 Mar 2026: Added automatic pod restart after NFS remount
+
+The script now also tracks whether a mount was fixed via the `MOUNT_FIXED` variable. After a successful remount, it queries kubectl for pods on the local node that are stuck in `Unknown`, `Pending`, or `ContainerCreating` state and force-deletes them. Kubernetes then automatically reschedules these pods, which will now succeed because the NFS mount is healthy again. Without this, pods that hit a stale mount would remain broken until manually deleted, even after the underlying NFS issue was resolved.
+
### Complete Failover Test
Here's a comprehensive test of the failover behaviour with all optimisations in place: