diff options
Diffstat (limited to 'gemfeed/2025-07-14-f3s-kubernetes-with-freebsd-part-6.md')
| -rw-r--r-- | gemfeed/2025-07-14-f3s-kubernetes-with-freebsd-part-6.md | 34 |
1 files changed, 33 insertions, 1 deletions
diff --git a/gemfeed/2025-07-14-f3s-kubernetes-with-freebsd-part-6.md b/gemfeed/2025-07-14-f3s-kubernetes-with-freebsd-part-6.md index 2323aeca..9c428755 100644 --- a/gemfeed/2025-07-14-f3s-kubernetes-with-freebsd-part-6.md +++ b/gemfeed/2025-07-14-f3s-kubernetes-with-freebsd-part-6.md @@ -1,6 +1,6 @@ # f3s: Kubernetes with FreeBSD - Part 6: Storage -> Published at 2025-07-13T16:44:29+03:00, last updated Tue 27 Jan 10:09:08 EET 2026 +> Published at 2025-07-13T16:44:29+03:00, last updated Wed 19 Mar 2026 This is the sixth blog post about the f3s series for self-hosting demands in a home lab. f3s? The "f" stands for FreeBSD, and the "3s" stands for k3s, the Kubernetes distribution used on FreeBSD-based physical machines. @@ -1641,6 +1641,8 @@ fi touch "$LOCK_FILE" trap "rm -f $LOCK_FILE" EXIT +MOUNT_FIXED=0 + fix_mount () { echo "Attempting to remount NFS mount $MOUNT_POINT" if mount -o remount -f "$MOUNT_POINT" 2>/dev/null; then @@ -1656,6 +1658,7 @@ fix_mount () { echo "$MOUNT_POINT is not a valid mountpoint, attempting mount" if mount "$MOUNT_POINT"; then echo "Successfully mounted $MOUNT_POINT" + MOUNT_FIXED=1 return else echo "Failed to mount $MOUNT_POINT" @@ -1672,6 +1675,7 @@ fix_mount () { echo "Attempting to mount $MOUNT_POINT" if mount "$MOUNT_POINT"; then echo "NFS mount $MOUNT_POINT mounted successfully" + MOUNT_FIXED=1 return else echo "Failed to mount NFS mount $MOUNT_POINT" @@ -1690,6 +1694,30 @@ if ! timeout 2s stat "$MOUNT_POINT" >/dev/null 2>&1; then echo "NFS mount $MOUNT_POINT appears to be unresponsive" fix_mount fi + +# After a successful remount, delete pods stuck on this node +if [ "$MOUNT_FIXED" -eq 1 ]; then + echo "Mount was fixed, checking for stuck pods on this node..." + NODE=$(hostname) + export KUBECONFIG=/etc/rancher/k3s/k3s.yaml + kubectl get pods --all-namespaces \ + --field-selector="spec.nodeName=$NODE" \ + -o json 2>/dev/null | jq -r ' + .items[] | + select( + .status.phase == "Unknown" or + .status.phase == "Pending" or + (.status.conditions // [] | + any(.type == "Ready" and .status == "False")) or + (.status.containerStatuses // [] | + any(.state.waiting.reason == "ContainerCreating")) + ) | "\(.metadata.namespace) \(.metadata.name)"' | \ + while read ns pod; do + echo "Deleting stuck pod $ns/$pod" + kubectl delete pod -n "$ns" "$pod" \ + --grace-period=0 --force 2>&1 + done +fi EOF [root@r0 ~]# chmod +x /usr/local/bin/check-nfs-mount.sh @@ -1749,6 +1777,10 @@ To enable and start the timer, we run: Note: Stale file handles are inherent to NFS failover because file handles are server-specific. The best approach depends on your application's tolerance for brief disruptions. Of course, all the changes made to `r0` above must also be applied to `r1` and `r2`. +> Updated Wed 19 Mar 2026: Added automatic pod restart after NFS remount + +The script now also tracks whether a mount was fixed via the `MOUNT_FIXED` variable. After a successful remount, it queries kubectl for pods on the local node that are stuck in `Unknown`, `Pending`, or `ContainerCreating` state and force-deletes them. Kubernetes then automatically reschedules these pods, which will now succeed because the NFS mount is healthy again. Without this, pods that hit a stale mount would remain broken until manually deleted, even after the underlying NFS issue was resolved. + ### Complete Failover Test Here's a comprehensive test of the failover behaviour with all optimisations in place: |
