Skip to content

Commit 24368a7

Browse files
author
Al Viro
committed
sanitize handling of long-term internal mounts
Original rationale for those had been the reduced cost of mntput() for the stuff that is mounted somewhere. Mount refcount increments and decrements are frequent; what's worse, they tend to concentrate on the same instances and cacheline pingpong is quite noticable. As the result, mount refcounts are per-cpu; that allows a very cheap increment. Plain decrement would be just as easy, but decrement-and-test is anything but (we need to add the components up, with exclusion against possible increment-from-zero, etc.). Fortunately, there is a very common case where we can tell that decrement won't be the final one - if the thing we are dropping is currently mounted somewhere. We have an RCU delay between the removal from mount tree and dropping the reference that used to pin it there, so we can just take rcu_read_lock() and check if the victim is mounted somewhere. If it is, we can go ahead and decrement without and further checks - the reference we are dropping is not the last one. If it isn't, we get all the fun with locking, carefully adding up components, etc., but the majority of refcount decrements end up taking the fast path. There is a major exception, though - pipes and sockets. Those live on the internal filesystems that are not going to be mounted anywhere. They are not going to be _un_mounted, of course, so having to take the slow path every time a pipe or socket gets closed is really obnoxious. Solution had been to mark them as long-lived ones - essentially faking "they are mounted somewhere" indicator. With minor modification that works even for ones that do eventually get dropped - all it takes is making sure we have an RCU delay between clearing the "mounted somewhere" indicator and dropping the reference. There are some additional twists (if you want to drop a dozen of such internal mounts, you'd be better off with clearing the indicator on all of them, doing an RCU delay once, then dropping the references), but in the basic form it had been * use kern_mount() if you want your internal mount to be a long-term one. * use kern_unmount() to undo that. Unfortunately, the things did rot a bit during the mount API reshuffling. In several cases we have lost the "fake the indicator" part; kern_unmount() on the unmount side remained (it doesn't warn if you use it on a mount without the indicator), but all benefits regaring mntput() cost had been lost. To get rid of that bitrot, let's add a new helper that would work with fs_context-based API: fc_mount_longterm(). It's a counterpart of fc_mount() that does, on success, mark its result as long-term. It must be paired with kern_unmount() or equivalents. Converted: 1) mqueue (it used to use kern_mount_data() and the umount side is still as it used to be) 2) hugetlbfs (used to use kern_mount_data(), internal mount is never unmounted in this one) 3) i915 gemfs (used to be kern_mount() + manual remount to set options, still uses kern_unmount() on umount side) 4) v3d gemfs (copied from i915) Reviewed-by: Christian Brauner <[email protected]> Signed-off-by: Al Viro <[email protected]>
1 parent c93ff74 commit 24368a7

File tree

6 files changed

+48
-8
lines changed

6 files changed

+48
-8
lines changed

drivers/gpu/drm/i915/gem/i915_gemfs.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,23 @@
55

66
#include <linux/fs.h>
77
#include <linux/mount.h>
8+
#include <linux/fs_context.h>
89

910
#include "i915_drv.h"
1011
#include "i915_gemfs.h"
1112
#include "i915_utils.h"
1213

14+
static int add_param(struct fs_context *fc, const char *key, const char *val)
15+
{
16+
return vfs_parse_fs_string(fc, key, val, strlen(val));
17+
}
18+
1319
void i915_gemfs_init(struct drm_i915_private *i915)
1420
{
15-
char huge_opt[] = "huge=within_size"; /* r/w */
1621
struct file_system_type *type;
22+
struct fs_context *fc;
1723
struct vfsmount *gemfs;
24+
int ret;
1825

1926
/*
2027
* By creating our own shmemfs mountpoint, we can pass in
@@ -38,8 +45,16 @@ void i915_gemfs_init(struct drm_i915_private *i915)
3845
if (!type)
3946
goto err;
4047

41-
gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, huge_opt);
42-
if (IS_ERR(gemfs))
48+
fc = fs_context_for_mount(type, SB_KERNMOUNT);
49+
if (IS_ERR(fc))
50+
goto err;
51+
ret = add_param(fc, "source", "tmpfs");
52+
if (!ret)
53+
ret = add_param(fc, "huge", "within_size");
54+
if (!ret)
55+
gemfs = fc_mount_longterm(fc);
56+
put_fs_context(fc);
57+
if (ret)
4358
goto err;
4459

4560
i915->mm.gemfs = gemfs;

drivers/gpu/drm/v3d/v3d_gemfs.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,21 @@
33

44
#include <linux/fs.h>
55
#include <linux/mount.h>
6+
#include <linux/fs_context.h>
67

78
#include "v3d_drv.h"
89

10+
static int add_param(struct fs_context *fc, const char *key, const char *val)
11+
{
12+
return vfs_parse_fs_string(fc, key, val, strlen(val));
13+
}
14+
915
void v3d_gemfs_init(struct v3d_dev *v3d)
1016
{
11-
char huge_opt[] = "huge=within_size";
1217
struct file_system_type *type;
18+
struct fs_context *fc;
1319
struct vfsmount *gemfs;
20+
int ret;
1421

1522
/*
1623
* By creating our own shmemfs mountpoint, we can pass in
@@ -28,8 +35,16 @@ void v3d_gemfs_init(struct v3d_dev *v3d)
2835
if (!type)
2936
goto err;
3037

31-
gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, huge_opt);
32-
if (IS_ERR(gemfs))
38+
fc = fs_context_for_mount(type, SB_KERNMOUNT);
39+
if (IS_ERR(fc))
40+
goto err;
41+
ret = add_param(fc, "source", "tmpfs");
42+
if (!ret)
43+
ret = add_param(fc, "huge", "within_size");
44+
if (!ret)
45+
gemfs = fc_mount_longterm(fc);
46+
put_fs_context(fc);
47+
if (ret)
3348
goto err;
3449

3550
v3d->gemfs = gemfs;

fs/hugetlbfs/inode.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1587,7 +1587,7 @@ static struct vfsmount *__init mount_one_hugetlbfs(struct hstate *h)
15871587
} else {
15881588
struct hugetlbfs_fs_context *ctx = fc->fs_private;
15891589
ctx->hstate = h;
1590-
mnt = fc_mount(fc);
1590+
mnt = fc_mount_longterm(fc);
15911591
put_fs_context(fc);
15921592
}
15931593
if (IS_ERR(mnt))

fs/namespace.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1260,6 +1260,15 @@ struct vfsmount *fc_mount(struct fs_context *fc)
12601260
}
12611261
EXPORT_SYMBOL(fc_mount);
12621262

1263+
struct vfsmount *fc_mount_longterm(struct fs_context *fc)
1264+
{
1265+
struct vfsmount *mnt = fc_mount(fc);
1266+
if (!IS_ERR(mnt))
1267+
real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
1268+
return mnt;
1269+
}
1270+
EXPORT_SYMBOL(fc_mount_longterm);
1271+
12631272
struct vfsmount *vfs_kern_mount(struct file_system_type *type,
12641273
int flags, const char *name,
12651274
void *data)

include/linux/mount.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ int mnt_get_write_access(struct vfsmount *mnt);
9898
void mnt_put_write_access(struct vfsmount *mnt);
9999

100100
extern struct vfsmount *fc_mount(struct fs_context *fc);
101+
extern struct vfsmount *fc_mount_longterm(struct fs_context *fc);
101102
extern struct vfsmount *vfs_create_mount(struct fs_context *fc);
102103
extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
103104
int flags, const char *name,

ipc/mqueue.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ static struct vfsmount *mq_create_mount(struct ipc_namespace *ns)
482482
put_user_ns(fc->user_ns);
483483
fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns);
484484

485-
mnt = fc_mount(fc);
485+
mnt = fc_mount_longterm(fc);
486486
put_fs_context(fc);
487487
return mnt;
488488
}

0 commit comments

Comments
 (0)