From 7cbc3955ef3b5bda9336654f1b10a803202e7ed0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 15 May 2017 14:42:07 -0500 Subject: mnt: In umount propagation reparent in a separate pass commit 570487d3faf2a1d8a220e6ee10f472163123d7da upstream. It was observed that in some pathlogical cases that the current code does not unmount everything it should. After investigation it was determined that the issue is that mnt_change_mntpoint can can change which mounts are available to be unmounted during mount propagation which is wrong. The trivial reproducer is: $ cat ./pathological.sh mount -t tmpfs test-base /mnt cd /mnt mkdir 1 2 1/1 mount --bind 1 1 mount --make-shared 1 mount --bind 1 2 mount --bind 1/1 1/1 mount --bind 1/1 1/1 echo grep test-base /proc/self/mountinfo umount 1/1 echo grep test-base /proc/self/mountinfo $ unshare -Urm ./pathological.sh The expected output looks like: 46 31 0:25 / /mnt rw,relatime - tmpfs test-base rw,uid=1000,gid=1000 47 46 0:25 /1 /mnt/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 48 46 0:25 /1 /mnt/2 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 49 54 0:25 /1/1 /mnt/1/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 50 53 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 51 49 0:25 /1/1 /mnt/1/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 54 47 0:25 /1/1 /mnt/1/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 53 48 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 52 50 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 46 31 0:25 / /mnt rw,relatime - tmpfs test-base rw,uid=1000,gid=1000 47 46 0:25 /1 /mnt/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 48 46 0:25 /1 /mnt/2 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 The output without the fix looks like: 46 31 0:25 / /mnt rw,relatime - tmpfs test-base rw,uid=1000,gid=1000 47 46 0:25 /1 /mnt/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 48 46 0:25 /1 /mnt/2 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 49 54 0:25 /1/1 /mnt/1/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 50 53 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 51 49 0:25 /1/1 /mnt/1/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 54 47 0:25 /1/1 /mnt/1/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 53 48 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 52 50 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 46 31 0:25 / /mnt rw,relatime - tmpfs test-base rw,uid=1000,gid=1000 47 46 0:25 /1 /mnt/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 48 46 0:25 /1 /mnt/2 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 52 48 0:25 /1/1 /mnt/2/1 rw,relatime shared:1 - tmpfs test-base rw,uid=1000,gid=1000 That last mount in the output was in the propgation tree to be unmounted but was missed because the mnt_change_mountpoint changed it's parent before the walk through the mount propagation tree observed it. Fixes: 1064f874abc0 ("mnt: Tuck mounts under others instead of creating shadow/side mounts.") Acked-by: Andrei Vagin Reviewed-by: Ram Pai Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index f26d18d69712..4c7174d2041e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -237,6 +237,7 @@ static struct mount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_slave_list); INIT_LIST_HEAD(&mnt->mnt_slave); INIT_HLIST_NODE(&mnt->mnt_mp_list); + INIT_LIST_HEAD(&mnt->mnt_reparent); #ifdef CONFIG_FSNOTIFY INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); #endif -- cgit v1.2.3 From fdb8f10499924d96b8eb60cc1354b278f6fcf0e9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 24 Oct 2016 16:16:13 -0500 Subject: mnt: In propgate_umount handle visiting mounts in any order commit 99b19d16471e9c3faa85cad38abc9cbbe04c6d55 upstream. While investigating some poor umount performance I realized that in the case of overlapping mount trees where some of the mounts are locked the code has been failing to unmount all of the mounts it should have been unmounting. This failure to unmount all of the necessary mounts can be reproduced with: $ cat locked_mounts_test.sh mount -t tmpfs test-base /mnt mount --make-shared /mnt mkdir -p /mnt/b mount -t tmpfs test1 /mnt/b mount --make-shared /mnt/b mkdir -p /mnt/b/10 mount -t tmpfs test2 /mnt/b/10 mount --make-shared /mnt/b/10 mkdir -p /mnt/b/10/20 mount --rbind /mnt/b /mnt/b/10/20 unshare -Urm --propagation unchaged /bin/sh -c 'sleep 5; if [ $(grep test /proc/self/mountinfo | wc -l) -eq 1 ] ; then echo SUCCESS ; else echo FAILURE ; fi' sleep 1 umount -l /mnt/b wait %% $ unshare -Urm ./locked_mounts_test.sh This failure is corrected by removing the prepass that marks mounts that may be umounted. A first pass is added that umounts mounts if possible and if not sets mount mark if they could be unmounted if they weren't locked and adds them to a list to umount possibilities. This first pass reconsiders the mounts parent if it is on the list of umount possibilities, ensuring that information of umoutability will pass from child to mount parent. A second pass then walks through all mounts that are umounted and processes their children unmounting them or marking them for reparenting. A last pass cleans up the state on the mounts that could not be umounted and if applicable reparents them to their first parent that remained mounted. While a bit longer than the old code this code is much more robust as it allows information to flow up from the leaves and down from the trunk making the order in which mounts are encountered in the umount propgation tree irrelevant. Fixes: 0c56fe31420c ("mnt: Don't propagate unmounts to locked mounts") Reviewed-by: Andrei Vagin Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index 4c7174d2041e..ec4078d16eb7 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -237,7 +237,7 @@ static struct mount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_slave_list); INIT_LIST_HEAD(&mnt->mnt_slave); INIT_HLIST_NODE(&mnt->mnt_mp_list); - INIT_LIST_HEAD(&mnt->mnt_reparent); + INIT_LIST_HEAD(&mnt->mnt_umounting); #ifdef CONFIG_FSNOTIFY INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); #endif -- cgit v1.2.3