use uniform permission checks for all mount propagation changes

PlaidCat · PlaidCat · commit 0b2de89735e6 · 2025-10-03T14:41:19.000-04:00
jira LE-4311 cve CVE-2025-38498 Rebuild_History Non-Buildable kernel-5.14.0-570.46.1.el9_6 commit-author Al Viro <viro@zeniv.linux.org.uk> commit cffd044 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-5.14.0-570.46.1.el9_6/cffd0441.failed do_change_type() and do_set_group() are operating on different aspects of the same thing - propagation graph. The latter asks for mounts involved to be mounted in namespace(s) the caller has CAP_SYS_ADMIN for. The former is a mess - originally it didn't even check that mount *is* mounted. That got fixed, but the resulting check turns out to be too strict for userland - in effect, we check that mount is in our namespace, having already checked that we have CAP_SYS_ADMIN there. What we really need (in both cases) is * only touch mounts that are mounted. That's a must-have constraint - data corruption happens if it get violated. * don't allow to mess with a namespace unless you already have enough permissions to do so (i.e. CAP_SYS_ADMIN in its userns). That's an equivalent of what do_set_group() does; let's extract that into a helper (may_change_propagation()) and use it in both do_set_group() and do_change_type(). Fixes: 12f147d "do_change_type(): refuse to operate on unmounted/not ours mounts" Acked-by: Andrei Vagin <avagin@gmail.com> Reviewed-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com> Tested-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com> Reviewed-by: Christian Brauner <brauner@kernel.org> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> (cherry picked from commit cffd044) Signed-off-by: Jonathan Maple <jmaple@ciq.com> # Conflicts: # fs/namespace.c
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.46.1.el9_6/cffd0441.failed b/ciq/ciq_backports/kernel-5.14.0-570.46.1.el9_6/cffd0441.failed
@@ -0,0 +1,328 @@
+use uniform permission checks for all mount propagation changes
+
+jira LE-4311
+cve CVE-2025-38498
+Rebuild_History Non-Buildable kernel-5.14.0-570.46.1.el9_6
+commit-author Al Viro <viro@zeniv.linux.org.uk>
+commit cffd0441872e7f6b1fce5e78fb1c99187a291330
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.46.1.el9_6/cffd0441.failed
+
+do_change_type() and do_set_group() are operating on different
+aspects of the same thing - propagation graph.  The latter
+asks for mounts involved to be mounted in namespace(s) the caller
+has CAP_SYS_ADMIN for.  The former is a mess - originally it
+didn't even check that mount *is* mounted.  That got fixed,
+but the resulting check turns out to be too strict for userland -
+in effect, we check that mount is in our namespace, having already
+checked that we have CAP_SYS_ADMIN there.
+
+What we really need (in both cases) is
+	* only touch mounts that are mounted.  That's a must-have
+constraint - data corruption happens if it get violated.
+	* don't allow to mess with a namespace unless you already
+have enough permissions to do so (i.e. CAP_SYS_ADMIN in its userns).
+
+That's an equivalent of what do_set_group() does; let's extract that
+into a helper (may_change_propagation()) and use it in both
+do_set_group() and do_change_type().
+
+Fixes: 12f147ddd6de "do_change_type(): refuse to operate on unmounted/not ours mounts"
+	Acked-by: Andrei Vagin <avagin@gmail.com>
+	Reviewed-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+	Tested-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+	Reviewed-by: Christian Brauner <brauner@kernel.org>
+	Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+(cherry picked from commit cffd0441872e7f6b1fce5e78fb1c99187a291330)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	fs/namespace.c
+diff --cc fs/namespace.c
+index 4ec9c03ab924,88db58061919..000000000000
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@@ -2278,9 -2856,22 +2278,22 @@@ static int graft_tree(struct mount *mnt
+  	      d_is_dir(mnt->mnt.mnt_root))
+  		return -ENOTDIR;
+  
+ -	return attach_recursive_mnt(mnt, p, mp);
+ +	return attach_recursive_mnt(mnt, p, mp, false);
+  }
+  
++ static int may_change_propagation(const struct mount *m)
++ {
++         struct mnt_namespace *ns = m->mnt_ns;
++ 
++ 	 // it must be mounted in some namespace
++ 	 if (IS_ERR_OR_NULL(ns))         // is_mounted()
++ 		 return -EINVAL;
++ 	 // and the caller must be admin in userns of that namespace
++ 	 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
++ 		 return -EPERM;
++ 	 return 0;
++ }
++ 
+  /*
+   * Sanity check the flags to change_mnt_propagation.
+   */
+@@@ -2671,29 -3347,238 +2684,121 @@@ static inline int tree_contains_unbinda
+  	return 0;
+  }
+  
+++<<<<<<< HEAD
+++=======
++ static int do_set_group(struct path *from_path, struct path *to_path)
++ {
++ 	struct mount *from, *to;
++ 	int err;
++ 
++ 	from = real_mount(from_path->mnt);
++ 	to = real_mount(to_path->mnt);
++ 
++ 	namespace_lock();
++ 
++ 	err = may_change_propagation(from);
++ 	if (err)
++ 		goto out;
++ 	err = may_change_propagation(to);
++ 	if (err)
++ 		goto out;
++ 
++ 	err = -EINVAL;
++ 	/* To and From paths should be mount roots */
++ 	if (!path_mounted(from_path))
++ 		goto out;
++ 	if (!path_mounted(to_path))
++ 		goto out;
++ 
++ 	/* Setting sharing groups is only allowed across same superblock */
++ 	if (from->mnt.mnt_sb != to->mnt.mnt_sb)
++ 		goto out;
++ 
++ 	/* From mount root should be wider than To mount root */
++ 	if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root))
++ 		goto out;
++ 
++ 	/* From mount should not have locked children in place of To's root */
++ 	if (__has_locked_children(from, to->mnt.mnt_root))
++ 		goto out;
++ 
++ 	/* Setting sharing groups is only allowed on private mounts */
++ 	if (IS_MNT_SHARED(to) || IS_MNT_SLAVE(to))
++ 		goto out;
++ 
++ 	/* From should not be private */
++ 	if (!IS_MNT_SHARED(from) && !IS_MNT_SLAVE(from))
++ 		goto out;
++ 
++ 	if (IS_MNT_SLAVE(from)) {
++ 		hlist_add_behind(&to->mnt_slave, &from->mnt_slave);
++ 		to->mnt_master = from->mnt_master;
++ 	}
++ 
++ 	if (IS_MNT_SHARED(from)) {
++ 		to->mnt_group_id = from->mnt_group_id;
++ 		list_add(&to->mnt_share, &from->mnt_share);
++ 		set_mnt_shared(to);
++ 	}
++ 
++ 	err = 0;
++ out:
++ 	namespace_unlock();
++ 	return err;
++ }
++ 
++ /**
++  * path_overmounted - check if path is overmounted
++  * @path: path to check
++  *
++  * Check if path is overmounted, i.e., if there's a mount on top of
++  * @path->mnt with @path->dentry as mountpoint.
++  *
++  * Context: namespace_sem must be held at least shared.
++  * MUST NOT be called under lock_mount_hash() (there one should just
++  * call __lookup_mnt() and check if it returns NULL).
++  * Return: If path is overmounted true is returned, false if not.
++  */
++ static inline bool path_overmounted(const struct path *path)
++ {
++ 	unsigned seq = read_seqbegin(&mount_lock);
++ 	bool no_child;
++ 
++ 	rcu_read_lock();
++ 	no_child = !__lookup_mnt(path->mnt, path->dentry);
++ 	rcu_read_unlock();
++ 	if (need_seqretry(&mount_lock, seq)) {
++ 		read_seqlock_excl(&mount_lock);
++ 		no_child = !__lookup_mnt(path->mnt, path->dentry);
++ 		read_sequnlock_excl(&mount_lock);
++ 	}
++ 	return unlikely(!no_child);
++ }
++ 
+++>>>>>>> cffd0441872e (use uniform permission checks for all mount propagation changes)
+  /*
+ - * Check if there is a possibly empty chain of descent from p1 to p2.
+ - * Locks: namespace_sem (shared) or mount_lock (read_seqlock_excl).
+ - */
+ -static bool mount_is_ancestor(const struct mount *p1, const struct mount *p2)
+ -{
+ -	while (p2 != p1 && mnt_has_parent(p2))
+ -		p2 = p2->mnt_parent;
+ -	return p2 == p1;
+ -}
+ -
+ -/**
+ - * can_move_mount_beneath - check that we can mount beneath the top mount
+ - * @from: mount to mount beneath
+ - * @to:   mount under which to mount
+ - * @mp:   mountpoint of @to
+ - *
+ - * - Make sure that @to->dentry is actually the root of a mount under
+ - *   which we can mount another mount.
+ - * - Make sure that nothing can be mounted beneath the caller's current
+ - *   root or the rootfs of the namespace.
+ - * - Make sure that the caller can unmount the topmost mount ensuring
+ - *   that the caller could reveal the underlying mountpoint.
+ - * - Ensure that nothing has been mounted on top of @from before we
+ - *   grabbed @namespace_sem to avoid creating pointless shadow mounts.
+ - * - Prevent mounting beneath a mount if the propagation relationship
+ - *   between the source mount, parent mount, and top mount would lead to
+ - *   nonsensical mount trees.
+ - *
+ - * Context: This function expects namespace_lock() to be held.
+ - * Return: On success 0, and on error a negative error code is returned.
+ - */
+ -static int can_move_mount_beneath(const struct path *from,
+ -				  const struct path *to,
+ -				  const struct mountpoint *mp)
+ -{
+ -	struct mount *mnt_from = real_mount(from->mnt),
+ -		     *mnt_to = real_mount(to->mnt),
+ -		     *parent_mnt_to = mnt_to->mnt_parent;
+ -
+ -	if (!mnt_has_parent(mnt_to))
+ -		return -EINVAL;
+ -
+ -	if (!path_mounted(to))
+ -		return -EINVAL;
+ -
+ -	if (IS_MNT_LOCKED(mnt_to))
+ -		return -EINVAL;
+ -
+ -	/* Avoid creating shadow mounts during mount propagation. */
+ -	if (path_overmounted(from))
+ -		return -EINVAL;
+ -
+ -	/*
+ -	 * Mounting beneath the rootfs only makes sense when the
+ -	 * semantics of pivot_root(".", ".") are used.
+ -	 */
+ -	if (&mnt_to->mnt == current->fs->root.mnt)
+ -		return -EINVAL;
+ -	if (parent_mnt_to == current->nsproxy->mnt_ns->root)
+ -		return -EINVAL;
+ -
+ -	if (mount_is_ancestor(mnt_to, mnt_from))
+ -		return -EINVAL;
+ -
+ -	/*
+ -	 * If the parent mount propagates to the child mount this would
+ -	 * mean mounting @mnt_from on @mnt_to->mnt_parent and then
+ -	 * propagating a copy @c of @mnt_from on top of @mnt_to. This
+ -	 * defeats the whole purpose of mounting beneath another mount.
+ -	 */
+ -	if (propagation_would_overmount(parent_mnt_to, mnt_to, mp))
+ -		return -EINVAL;
+ -
+ -	/*
+ -	 * If @mnt_to->mnt_parent propagates to @mnt_from this would
+ -	 * mean propagating a copy @c of @mnt_from on top of @mnt_from.
+ -	 * Afterwards @mnt_from would be mounted on top of
+ -	 * @mnt_to->mnt_parent and @mnt_to would be unmounted from
+ -	 * @mnt->mnt_parent and remounted on @mnt_from. But since @c is
+ -	 * already mounted on @mnt_from, @mnt_to would ultimately be
+ -	 * remounted on top of @c. Afterwards, @mnt_from would be
+ -	 * covered by a copy @c of @mnt_from and @c would be covered by
+ -	 * @mnt_from itself. This defeats the whole purpose of mounting
+ -	 * @mnt_from beneath @mnt_to.
+ -	 */
+ -	if (check_mnt(mnt_from) &&
+ -	    propagation_would_overmount(parent_mnt_to, mnt_from, mp))
+ -		return -EINVAL;
+ -
+ -	return 0;
+ -}
+ -
+ -/* may_use_mount() - check if a mount tree can be used
+ - * @mnt: vfsmount to be used
+ - *
+ - * This helper checks if the caller may use the mount tree starting
+ - * from @path->mnt. The caller may use the mount tree under the
+ - * following circumstances:
+ - *
+ - * (1) The caller is located in the mount namespace of the mount tree.
+ - *     This also implies that the mount does not belong to an anonymous
+ - *     mount namespace.
+ - * (2) The caller is trying to use a mount tree that belongs to an
+ - *     anonymous mount namespace.
+ - *
+ - *     For that to be safe, this helper enforces that the origin mount
+ - *     namespace the anonymous mount namespace was created from is the
+ - *     same as the caller's mount namespace by comparing the sequence
+ - *     numbers.
+ - *
+ - *     The ownership of a non-anonymous mount namespace such as the
+ - *     caller's cannot change.
+ - *     => We know that the caller's mount namespace is stable.
+ - *
+ - *     If the origin sequence number of the anonymous mount namespace is
+ - *     the same as the sequence number of the caller's mount namespace.
+ - *     => The owning namespaces are the same.
+ - *
+ - *     ==> The earlier capability check on the owning namespace of the
+ - *         caller's mount namespace ensures that the caller has the
+ - *         ability to use the mount tree.
+ - *
+ - * Returns true if the mount tree can be used, false otherwise.
+ + * Check that there aren't references to earlier/same mount namespaces in the
+ + * specified subtree.  Such references can act as pins for mount namespaces
+ + * that aren't checked by the mount-cycle checking code, thereby allowing
+ + * cycles to be made.
+   */
+ -static inline bool may_use_mount(struct mount *mnt)
+ +static bool check_for_nsfs_mounts(struct mount *subtree)
+  {
+ -	if (check_mnt(mnt))
+ -		return true;
+ +	struct mount *p;
+ +	bool ret = false;
+  
+ -	/*
+ -	 * Make sure that noone unmounted the target path or somehow
+ -	 * managed to get their hands on something purely kernel
+ -	 * internal.
+ -	 */
+ -	if (!is_mounted(&mnt->mnt))
+ -		return false;
+ +	lock_mount_hash();
+ +	for (p = subtree; p; p = next_mnt(p, subtree))
+ +		if (mnt_ns_loop(p->mnt.mnt_root))
+ +			goto out;
+  
+ -	return check_anonymous_mnt(mnt);
+ +	ret = true;
+ +out:
+ +	unlock_mount_hash();
+ +	return ret;
+  }
+  
+ -static int do_move_mount(struct path *old_path,
+ -			 struct path *new_path, enum mnt_tree_flags_t flags)
+ +static int do_move_mount(struct path *old_path, struct path *new_path)
+  {
+  	struct mnt_namespace *ns;
+  	struct mount *p;
+* Unmerged path fs/namespace.c