Skip to content

Commit 4009132

Browse files
committed
Merge branch 'work.mount-syscalls' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull mount ABI updates from Al Viro: "The syscalls themselves, finally. That's not all there is to that stuff, but switching individual filesystems to new methods is fortunately independent from everything else, so e.g. NFS series can go through NFS tree, etc. As those conversions get done, we'll be finally able to get rid of a bunch of duplication in fs/super.c introduced in the beginning of the entire thing. I expect that to be finished in the next window..." * 'work.mount-syscalls' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: vfs: Add a sample program for the new mount API vfs: syscall: Add fspick() to select a superblock for reconfiguration vfs: syscall: Add fsmount() to create a mount for a superblock vfs: syscall: Add fsconfig() for configuring and managing a context vfs: Implement logging through fs_context vfs: syscall: Add fsopen() to prepare for superblock creation Make anon_inodes unconditional teach move_mount(2) to work with OPEN_TREE_CLONE vfs: syscall: Add move_mount(2) to move mounts around vfs: syscall: Add open_tree(2) to reference or clone a mount
2 parents d27fb65 + f1b5618 commit 4009132

File tree

22 files changed

+1353
-91
lines changed

22 files changed

+1353
-91
lines changed

arch/x86/entry/syscalls/syscall_32.tbl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,12 @@
398398
384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl
399399
385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents
400400
386 i386 rseq sys_rseq __ia32_sys_rseq
401-
# don't use numbers 387 through 392, add new calls at the end
401+
387 i386 open_tree sys_open_tree __ia32_sys_open_tree
402+
388 i386 move_mount sys_move_mount __ia32_sys_move_mount
403+
389 i386 fsopen sys_fsopen __ia32_sys_fsopen
404+
390 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig
405+
391 i386 fsmount sys_fsmount __ia32_sys_fsmount
406+
392 i386 fspick sys_fspick __ia32_sys_fspick
402407
393 i386 semget sys_semget __ia32_sys_semget
403408
394 i386 semctl sys_semctl __ia32_compat_sys_semctl
404409
395 i386 shmget sys_shmget __ia32_sys_shmget

arch/x86/entry/syscalls/syscall_64.tbl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,12 @@
343343
332 common statx __x64_sys_statx
344344
333 common io_pgetevents __x64_sys_io_pgetevents
345345
334 common rseq __x64_sys_rseq
346+
335 common open_tree __x64_sys_open_tree
347+
336 common move_mount __x64_sys_move_mount
348+
337 common fsopen __x64_sys_fsopen
349+
338 common fsconfig __x64_sys_fsconfig
350+
339 common fsmount __x64_sys_fsmount
351+
340 common fspick __x64_sys_fspick
346352
# don't use numbers 387 through 423, add new calls after the last
347353
# 'common' entry
348354
424 common pidfd_send_signal __x64_sys_pidfd_send_signal

fs/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ obj-y := open.o read_write.o file_table.o super.o \
1313
seq_file.o xattr.o libfs.o fs-writeback.o \
1414
pnode.o splice.o sync.o utimes.o d_path.o \
1515
stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
16-
fs_types.o fs_context.o fs_parser.o
16+
fs_types.o fs_context.o fs_parser.o fsopen.o
1717

1818
ifeq ($(CONFIG_BLOCK),y)
1919
obj-y += buffer.o block_dev.o direct-io.o mpage.o

fs/file_table.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ static void __fput(struct file *file)
255255
struct dentry *dentry = file->f_path.dentry;
256256
struct vfsmount *mnt = file->f_path.mnt;
257257
struct inode *inode = file->f_inode;
258+
fmode_t mode = file->f_mode;
258259

259260
if (unlikely(!(file->f_mode & FMODE_OPENED)))
260261
goto out;
@@ -277,18 +278,20 @@ static void __fput(struct file *file)
277278
if (file->f_op->release)
278279
file->f_op->release(inode, file);
279280
if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
280-
!(file->f_mode & FMODE_PATH))) {
281+
!(mode & FMODE_PATH))) {
281282
cdev_put(inode->i_cdev);
282283
}
283284
fops_put(file->f_op);
284285
put_pid(file->f_owner.pid);
285-
if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
286+
if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
286287
i_readcount_dec(inode);
287-
if (file->f_mode & FMODE_WRITER) {
288+
if (mode & FMODE_WRITER) {
288289
put_write_access(inode);
289290
__mnt_drop_write(mnt);
290291
}
291292
dput(dentry);
293+
if (unlikely(mode & FMODE_NEED_UNMOUNT))
294+
dissolve_on_fput(mnt);
292295
mntput(mnt);
293296
out:
294297
file_free(file);

fs/fs_context.c

Lines changed: 146 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
*/
1212

1313
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14+
#include <linux/module.h>
1415
#include <linux/fs_context.h>
1516
#include <linux/fs_parser.h>
1617
#include <linux/fs.h>
@@ -23,6 +24,7 @@
2324
#include <linux/pid_namespace.h>
2425
#include <linux/user_namespace.h>
2526
#include <net/net_namespace.h>
27+
#include <asm/sections.h>
2628
#include "mount.h"
2729
#include "internal.h"
2830

@@ -271,6 +273,8 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
271273
fc->cred = get_current_cred();
272274
fc->net_ns = get_net(current->nsproxy->net_ns);
273275

276+
mutex_init(&fc->uapi_mutex);
277+
274278
switch (purpose) {
275279
case FS_CONTEXT_FOR_MOUNT:
276280
fc->user_ns = get_user_ns(fc->cred->user_ns);
@@ -353,6 +357,8 @@ struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
353357
if (!fc)
354358
return ERR_PTR(-ENOMEM);
355359

360+
mutex_init(&fc->uapi_mutex);
361+
356362
fc->fs_private = NULL;
357363
fc->s_fs_info = NULL;
358364
fc->source = NULL;
@@ -361,6 +367,8 @@ struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
361367
get_net(fc->net_ns);
362368
get_user_ns(fc->user_ns);
363369
get_cred(fc->cred);
370+
if (fc->log)
371+
refcount_inc(&fc->log->usage);
364372

365373
/* Can't call put until we've called ->dup */
366374
ret = fc->ops->dup(fc, src_fc);
@@ -378,35 +386,107 @@ struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
378386
}
379387
EXPORT_SYMBOL(vfs_dup_fs_context);
380388

381-
#ifdef CONFIG_PRINTK
382389
/**
383390
* logfc - Log a message to a filesystem context
384391
* @fc: The filesystem context to log to.
385392
* @fmt: The format of the buffer.
386393
*/
387394
void logfc(struct fs_context *fc, const char *fmt, ...)
388395
{
396+
static const char store_failure[] = "OOM: Can't store error string";
397+
struct fc_log *log = fc ? fc->log : NULL;
398+
const char *p;
389399
va_list va;
400+
char *q;
401+
u8 freeable;
390402

391403
va_start(va, fmt);
392-
393-
switch (fmt[0]) {
394-
case 'w':
395-
vprintk_emit(0, LOGLEVEL_WARNING, NULL, 0, fmt, va);
396-
break;
397-
case 'e':
398-
vprintk_emit(0, LOGLEVEL_ERR, NULL, 0, fmt, va);
399-
break;
400-
default:
401-
vprintk_emit(0, LOGLEVEL_NOTICE, NULL, 0, fmt, va);
402-
break;
404+
if (!strchr(fmt, '%')) {
405+
p = fmt;
406+
goto unformatted_string;
407+
}
408+
if (strcmp(fmt, "%s") == 0) {
409+
p = va_arg(va, const char *);
410+
goto unformatted_string;
403411
}
404412

405-
pr_cont("\n");
413+
q = kvasprintf(GFP_KERNEL, fmt, va);
414+
copied_string:
415+
if (!q)
416+
goto store_failure;
417+
freeable = 1;
418+
goto store_string;
419+
420+
unformatted_string:
421+
if ((unsigned long)p >= (unsigned long)__start_rodata &&
422+
(unsigned long)p < (unsigned long)__end_rodata)
423+
goto const_string;
424+
if (log && within_module_core((unsigned long)p, log->owner))
425+
goto const_string;
426+
q = kstrdup(p, GFP_KERNEL);
427+
goto copied_string;
428+
429+
store_failure:
430+
p = store_failure;
431+
const_string:
432+
q = (char *)p;
433+
freeable = 0;
434+
store_string:
435+
if (!log) {
436+
switch (fmt[0]) {
437+
case 'w':
438+
printk(KERN_WARNING "%s\n", q + 2);
439+
break;
440+
case 'e':
441+
printk(KERN_ERR "%s\n", q + 2);
442+
break;
443+
default:
444+
printk(KERN_NOTICE "%s\n", q + 2);
445+
break;
446+
}
447+
if (freeable)
448+
kfree(q);
449+
} else {
450+
unsigned int logsize = ARRAY_SIZE(log->buffer);
451+
u8 index;
452+
453+
index = log->head & (logsize - 1);
454+
BUILD_BUG_ON(sizeof(log->head) != sizeof(u8) ||
455+
sizeof(log->tail) != sizeof(u8));
456+
if ((u8)(log->head - log->tail) == logsize) {
457+
/* The buffer is full, discard the oldest message */
458+
if (log->need_free & (1 << index))
459+
kfree(log->buffer[index]);
460+
log->tail++;
461+
}
462+
463+
log->buffer[index] = q;
464+
log->need_free &= ~(1 << index);
465+
log->need_free |= freeable << index;
466+
log->head++;
467+
}
406468
va_end(va);
407469
}
408470
EXPORT_SYMBOL(logfc);
409-
#endif
471+
472+
/*
473+
* Free a logging structure.
474+
*/
475+
static void put_fc_log(struct fs_context *fc)
476+
{
477+
struct fc_log *log = fc->log;
478+
int i;
479+
480+
if (log) {
481+
if (refcount_dec_and_test(&log->usage)) {
482+
fc->log = NULL;
483+
for (i = 0; i <= 7; i++)
484+
if (log->need_free & (1 << i))
485+
kfree(log->buffer[i]);
486+
kfree(log);
487+
}
488+
}
489+
}
410490

411491
/**
412492
* put_fs_context - Dispose of a superblock configuration context.
@@ -431,6 +511,7 @@ void put_fs_context(struct fs_context *fc)
431511
put_user_ns(fc->user_ns);
432512
put_cred(fc->cred);
433513
kfree(fc->subtype);
514+
put_fc_log(fc);
434515
put_filesystem(fc->fs_type);
435516
kfree(fc->source);
436517
kfree(fc);
@@ -640,3 +721,54 @@ int parse_monolithic_mount_data(struct fs_context *fc, void *data)
640721

641722
return monolithic_mount_data(fc, data);
642723
}
724+
725+
/*
726+
* Clean up a context after performing an action on it and put it into a state
727+
* from where it can be used to reconfigure a superblock.
728+
*
729+
* Note that here we do only the parts that can't fail; the rest is in
730+
* finish_clean_context() below and in between those fs_context is marked
731+
* FS_CONTEXT_AWAITING_RECONF. The reason for splitup is that after
732+
* successful mount or remount we need to report success to userland.
733+
* Trying to do full reinit (for the sake of possible subsequent remount)
734+
* and failing to allocate memory would've put us into a nasty situation.
735+
* So here we only discard the old state and reinitialization is left
736+
* until we actually try to reconfigure.
737+
*/
738+
void vfs_clean_context(struct fs_context *fc)
739+
{
740+
if (fc->need_free && fc->ops && fc->ops->free)
741+
fc->ops->free(fc);
742+
fc->need_free = false;
743+
fc->fs_private = NULL;
744+
fc->s_fs_info = NULL;
745+
fc->sb_flags = 0;
746+
security_free_mnt_opts(&fc->security);
747+
kfree(fc->subtype);
748+
fc->subtype = NULL;
749+
kfree(fc->source);
750+
fc->source = NULL;
751+
752+
fc->purpose = FS_CONTEXT_FOR_RECONFIGURE;
753+
fc->phase = FS_CONTEXT_AWAITING_RECONF;
754+
}
755+
756+
int finish_clean_context(struct fs_context *fc)
757+
{
758+
int error;
759+
760+
if (fc->phase != FS_CONTEXT_AWAITING_RECONF)
761+
return 0;
762+
763+
if (fc->fs_type->init_fs_context)
764+
error = fc->fs_type->init_fs_context(fc);
765+
else
766+
error = legacy_init_fs_context(fc);
767+
if (unlikely(error)) {
768+
fc->phase = FS_CONTEXT_FAILED;
769+
return error;
770+
}
771+
fc->need_free = true;
772+
fc->phase = FS_CONTEXT_RECONF_PARAMS;
773+
return 0;
774+
}

0 commit comments

Comments
 (0)