Skip to content

Commit d08fa7f

Browse files
author
Al Viro
committed
don't set MNT_LOCKED on parentless mounts
Originally MNT_LOCKED meant only one thing - "don't let this mount to be peeled off its parent, we don't want to have its mountpoint exposed". Accordingly, it had only been set on mounts that *do* have a parent. Later it got overloaded with another use - setting it on the absolute root had given free protection against umount(2) of absolute root (was possible to trigger, oopsed). Not a bad trick, but it ended up costing more than it bought us. Unfortunately, the cost included both hard-to-reason-about logics and a subtle race between mount -o remount,ro and mount --[r]bind - lockless &= ~MNT_LOCKED in the end of __do_loopback() could race with sb_prepare_remount_readonly() setting and clearing MNT_HOLD_WRITE (under mount_lock, as it should be). The race wouldn't be much of a problem (there are other ways to deal with it), but the subtlety is. Turns out that nobody except umount(2) had ever made use of having MNT_LOCKED set on absolute root. So let's give up on that trick, clever as it had been, add an explicit check in do_umount() and return to using MNT_LOCKED only for mounts that have a parent. It means that * clone_mnt() no longer copies MNT_LOCKED * copy_tree() sets it on submounts if their counterparts had been marked such, and does that right next to attach_mnt() in there, in the same mount_lock scope. * __do_loopback() no longer needs to strip MNT_LOCKED off the root of subtree it's about to return; no store, no race. * init_mount_tree() doesn't bother setting MNT_LOCKED on absolute root. * lock_mnt_tree() does not set MNT_LOCKED on the subtree's root; accordingly, its caller (loop in attach_recursive_mnt()) does not need to bother stripping that MNT_LOCKED on root. Note that lock_mnt_tree() setting MNT_LOCKED on submounts happens in the same mount_lock scope as __attach_mnt() (from commit_tree()) that makes them reachable. Reviewed-by: Christian Brauner <[email protected]> Signed-off-by: Al Viro <[email protected]>
1 parent 1a867d7 commit d08fa7f

File tree

1 file changed

+15
-17
lines changed

1 file changed

+15
-17
lines changed

fs/namespace.c

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,7 +1313,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
13131313
}
13141314

13151315
mnt->mnt.mnt_flags = old->mnt.mnt_flags;
1316-
mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
1316+
mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL|MNT_LOCKED);
13171317

13181318
atomic_inc(&sb->s_active);
13191319
mnt->mnt.mnt_idmap = mnt_idmap_get(mnt_idmap(&old->mnt));
@@ -1988,6 +1988,9 @@ static int do_umount(struct mount *mnt, int flags)
19881988
if (mnt->mnt.mnt_flags & MNT_LOCKED)
19891989
goto out;
19901990

1991+
if (!mnt_has_parent(mnt)) /* not the absolute root */
1992+
goto out;
1993+
19911994
event++;
19921995
if (flags & MNT_DETACH) {
19931996
if (mnt_ns_attached(mnt) || !list_empty(&mnt->mnt_list))
@@ -2257,6 +2260,8 @@ struct mount *copy_tree(struct mount *src_root, struct dentry *dentry,
22572260
if (IS_ERR(dst_mnt))
22582261
goto out;
22592262
lock_mount_hash();
2263+
if (src_mnt->mnt.mnt_flags & MNT_LOCKED)
2264+
dst_mnt->mnt.mnt_flags |= MNT_LOCKED;
22602265
list_add_tail(&dst_mnt->mnt_list, &res->mnt_list);
22612266
attach_mnt(dst_mnt, dst_parent, src_parent->mnt_mp);
22622267
unlock_mount_hash();
@@ -2489,7 +2494,7 @@ static void lock_mnt_tree(struct mount *mnt)
24892494
if (flags & MNT_NOEXEC)
24902495
flags |= MNT_LOCK_NOEXEC;
24912496
/* Don't allow unprivileged users to reveal what is under a mount */
2492-
if (list_empty(&p->mnt_expire))
2497+
if (list_empty(&p->mnt_expire) && p != mnt)
24932498
flags |= MNT_LOCKED;
24942499
p->mnt.mnt_flags = flags;
24952500
}
@@ -2704,7 +2709,6 @@ static int attach_recursive_mnt(struct mount *source_mnt,
27042709
/* Notice when we are propagating across user namespaces */
27052710
if (child->mnt_parent->mnt_ns->user_ns != user_ns)
27062711
lock_mnt_tree(child);
2707-
child->mnt.mnt_flags &= ~MNT_LOCKED;
27082712
q = __lookup_mnt(&child->mnt_parent->mnt,
27092713
child->mnt_mountpoint);
27102714
if (q) {
@@ -2985,26 +2989,21 @@ static inline bool may_copy_tree(struct path *path)
29852989

29862990
static struct mount *__do_loopback(struct path *old_path, int recurse)
29872991
{
2988-
struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt);
2992+
struct mount *old = real_mount(old_path->mnt);
29892993

29902994
if (IS_MNT_UNBINDABLE(old))
2991-
return mnt;
2995+
return ERR_PTR(-EINVAL);
29922996

29932997
if (!may_copy_tree(old_path))
2994-
return mnt;
2998+
return ERR_PTR(-EINVAL);
29952999

29963000
if (!recurse && __has_locked_children(old, old_path->dentry))
2997-
return mnt;
3001+
return ERR_PTR(-EINVAL);
29983002

29993003
if (recurse)
3000-
mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
3004+
return copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
30013005
else
3002-
mnt = clone_mnt(old, old_path->dentry, 0);
3003-
3004-
if (!IS_ERR(mnt))
3005-
mnt->mnt.mnt_flags &= ~MNT_LOCKED;
3006-
3007-
return mnt;
3006+
return clone_mnt(old, old_path->dentry, 0);
30083007
}
30093008

30103009
/*
@@ -4749,11 +4748,11 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
47494748
if (!path_mounted(&root))
47504749
goto out4; /* not a mountpoint */
47514750
if (!mnt_has_parent(root_mnt))
4752-
goto out4; /* not attached */
4751+
goto out4; /* absolute root */
47534752
if (!path_mounted(&new))
47544753
goto out4; /* not a mountpoint */
47554754
if (!mnt_has_parent(new_mnt))
4756-
goto out4; /* not attached */
4755+
goto out4; /* absolute root */
47574756
/* make sure we can reach put_old from new_root */
47584757
if (!is_path_reachable(old_mnt, old.dentry, &new))
47594758
goto out4;
@@ -6154,7 +6153,6 @@ static void __init init_mount_tree(void)
61546153

61556154
root.mnt = mnt;
61566155
root.dentry = mnt->mnt_root;
6157-
mnt->mnt_flags |= MNT_LOCKED;
61586156

61596157
set_fs_pwd(current->fs, &root);
61606158
set_fs_root(current->fs, &root);

0 commit comments

Comments
 (0)