Skip to content

Commit

Permalink
fix: check mntPath status during fuse abort (#1268)
Browse files Browse the repository at this point in the history
when mountpod supports fuse pass, check if the mount point has been recovered by another normal mount pod, and if so, do not abort fuse connection.

Signed-off-by: Xuhui zhang <[email protected]>
  • Loading branch information
zxh326 authored Feb 12, 2025
1 parent 068fb72 commit cbe86dc
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 5 deletions.
29 changes: 28 additions & 1 deletion pkg/controller/pod_driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"runtime"
"strings"
"sync"
"syscall"
"time"

corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -979,7 +980,33 @@ func (p *PodDriver) checkMountPodStuck(pod *corev1.Pod) {

func (p *PodDriver) doAbortFuse(mountpod *corev1.Pod, devMinor uint32) error {
log := klog.NewKlogr().WithName("abortFuse").WithValues("podName", mountpod.Name)
job := builder.NewFuseAbortJob(mountpod, devMinor)
mntPath, _, err := util.GetMountPathOfPod(*mountpod)
if err != nil {
log.Error(err, "get mount point error")
return err
}
supFusePass := util.SupportFusePass(mountpod.Spec.Containers[0].Image)
if supFusePass {
err = util.DoWithTimeout(context.Background(), defaultCheckoutTimeout, func() error {
finfo, err := os.Stat(mntPath)
if err != nil {
return err
}
if st, ok := finfo.Sys().(*syscall.Stat_t); ok {
if st.Ino == 1 {
return nil
} else {
return fmt.Errorf("mount point is not fuse mount")
}
}
return err
})
if err == nil {
log.Info("mount point is normal, don't need to abort fuse connection")
return nil
}
}
job := builder.NewFuseAbortJob(mountpod, devMinor, mntPath)
if _, err := p.Client.CreateJob(context.Background(), job); err != nil {
log.Error(err, "create fuse abort job error")
return err
Expand Down
41 changes: 37 additions & 4 deletions pkg/juicefs/mount/builder/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,33 @@ func (r *JobBuilder) getDeleteVolumeCmd() string {
return fmt.Sprintf("%s && if [ -d /mnt/jfs/%s ]; then %s rmr /mnt/jfs/%s; fi;", cmd, subpath, jfsPath, subpath)
}

func NewFuseAbortJob(mountpod *corev1.Pod, devMinor uint32) *batchv1.Job {
func NewFuseAbortJob(mountpod *corev1.Pod, devMinor uint32, mntPath string) *batchv1.Job {
jobName := fmt.Sprintf("%s-abort-fuse", GenJobNameByVolumeId(mountpod.Name))
ttlSecond := DefaultJobTTLSecond
privileged := true
supFusePass := util.SupportFusePass(mountpod.Spec.Containers[0].Image)
command := fmt.Sprintf(`set -x
supFusePass=%t
if [ $supFusePass = true ]; then
attempt=1
while [ $attempt -le 5 ]; do
if inode=$(timeout 1 stat -c %%i %s 2>/dev/null) && [ "$inode" = "1" ]; then
echo "fuse mount point is normal, exit 0"
exit 0
fi
sleep 1
attempt=$((attempt+1))
done
fi
if [ $(cat /sys/fs/fuse/connections/%d/waiting) -eq 0 ]; then
echo "fuse connections 'waiting' is zero, skip"
fi
echo "fuse mount point is hung or deadlocked, aborting..."
echo 1 > /sys/fs/fuse/connections/%d/abort
`, supFusePass, mntPath, devMinor, devMinor)

return &batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Name: jobName,
Expand All @@ -193,9 +216,7 @@ func NewFuseAbortJob(mountpod *corev1.Pod, devMinor uint32) *batchv1.Job {
Command: []string{
"sh",
"-c",
fmt.Sprintf(
"if [ $(cat /sys/fs/fuse/connections/%d/waiting) -gt 0 ]; then echo 1 > /sys/fs/fuse/connections/%d/abort; fi;",
devMinor, devMinor),
command,
},
SecurityContext: &corev1.SecurityContext{
Privileged: &privileged,
Expand All @@ -205,6 +226,10 @@ func NewFuseAbortJob(mountpod *corev1.Pod, devMinor uint32) *batchv1.Job {
Name: "fuse-connections",
MountPath: "/sys/fs/fuse/connections",
},
{
Name: "jfs-dir",
MountPath: "/jfs",
},
},
},
},
Expand All @@ -219,6 +244,14 @@ func NewFuseAbortJob(mountpod *corev1.Pod, devMinor uint32) *batchv1.Job {
},
},
},
{
Name: "jfs-dir",
VolumeSource: corev1.VolumeSource{
HostPath: &corev1.HostPathVolumeSource{
Path: config.MountPointPath,
},
},
},
},
},
},
Expand Down

0 comments on commit cbe86dc

Please sign in to comment.