Skip to content

Commit 7b7b1ac

Browse files
Al ViroLinus Torvalds
authored andcommitted
[PATCH] saner handling of auto_acct_off() and DQUOT_OFF() in umount
The way we currently deal with quota and process accounting that might keep vfsmount busy at umount time is inherently broken; we try to turn them off just in case (not quite correctly, at that) and a) pray umount doesn't fail (otherwise they'll stay turned off) b) pray nobody doesn anything funny just as we turn quota off Moreover, LSM provides hooks for doing the same sort of broken logics. The proper way to deal with that is to introduce the second kind of reference to vfsmount. Semantics: - when the last normal reference is dropped, all special ones are converted to normal ones and if there had been any, cleanup is done. - normal reference can be cloned into a special one - special reference can be converted to normal one; that's a no-op if we'd already passed the point of no return (i.e. mntput() had converted special references to normal and started cleanup). The way it works: e.g. starting process accounting converts the vfsmount reference pinned by the opened file into special one and turns it back to normal when it gets shut down; acct_auto_close() is done when no normal references are left. That way it does *not* obstruct umount(2) and it silently gets turned off when the last normal reference to vfsmount is gone. Which is exactly what we want... The same should be done by LSM module that holds some internal references to vfsmount and wants to shut them down on umount - it should make them special and security_sb_umount_close() will be called exactly when the last normal reference to vfsmount is gone. quota handling is even simpler - we don't use normal file IO anymore, so there's no need to hold vfsmounts at all. DQUOT_OFF() is done from deactivate_super(), where it really belongs. Signed-off-by: Al Viro <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 254ce8d commit 7b7b1ac

File tree

7 files changed

+113
-79
lines changed

7 files changed

+113
-79
lines changed

fs/dquot.c

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1321,13 +1321,11 @@ int vfs_quota_off(struct super_block *sb, int type)
13211321
int cnt;
13221322
struct quota_info *dqopt = sb_dqopt(sb);
13231323
struct inode *toputinode[MAXQUOTAS];
1324-
struct vfsmount *toputmnt[MAXQUOTAS];
13251324

13261325
/* We need to serialize quota_off() for device */
13271326
down(&dqopt->dqonoff_sem);
13281327
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
13291328
toputinode[cnt] = NULL;
1330-
toputmnt[cnt] = NULL;
13311329
if (type != -1 && cnt != type)
13321330
continue;
13331331
if (!sb_has_quota_enabled(sb, cnt))
@@ -1348,20 +1346,15 @@ int vfs_quota_off(struct super_block *sb, int type)
13481346
put_quota_format(dqopt->info[cnt].dqi_format);
13491347

13501348
toputinode[cnt] = dqopt->files[cnt];
1351-
toputmnt[cnt] = dqopt->mnt[cnt];
13521349
dqopt->files[cnt] = NULL;
1353-
dqopt->mnt[cnt] = NULL;
13541350
dqopt->info[cnt].dqi_flags = 0;
13551351
dqopt->info[cnt].dqi_igrace = 0;
13561352
dqopt->info[cnt].dqi_bgrace = 0;
13571353
dqopt->ops[cnt] = NULL;
13581354
}
13591355
up(&dqopt->dqonoff_sem);
13601356
/* Sync the superblock so that buffers with quota data are written to
1361-
* disk (and so userspace sees correct data afterwards).
1362-
* The reference to vfsmnt we are still holding protects us from
1363-
* umount (we don't have it only when quotas are turned on/off for
1364-
* journal replay but in that case we are guarded by the fs anyway). */
1357+
* disk (and so userspace sees correct data afterwards). */
13651358
if (sb->s_op->sync_fs)
13661359
sb->s_op->sync_fs(sb, 1);
13671360
sync_blockdev(sb->s_bdev);
@@ -1385,10 +1378,6 @@ int vfs_quota_off(struct super_block *sb, int type)
13851378
iput(toputinode[cnt]);
13861379
}
13871380
up(&dqopt->dqonoff_sem);
1388-
/* We don't hold the reference when we turned on quotas
1389-
* just for the journal replay... */
1390-
if (toputmnt[cnt])
1391-
mntput(toputmnt[cnt]);
13921381
}
13931382
if (sb->s_bdev)
13941383
invalidate_bdev(sb->s_bdev, 0);
@@ -1503,11 +1492,8 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path)
15031492
/* Quota file not on the same filesystem? */
15041493
if (nd.mnt->mnt_sb != sb)
15051494
error = -EXDEV;
1506-
else {
1495+
else
15071496
error = vfs_quota_on_inode(nd.dentry->d_inode, type, format_id);
1508-
if (!error)
1509-
sb_dqopt(sb)->mnt[type] = mntget(nd.mnt);
1510-
}
15111497
out_path:
15121498
path_release(&nd);
15131499
return error;

fs/namespace.c

Lines changed: 41 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -172,15 +172,54 @@ clone_mnt(struct vfsmount *old, struct dentry *root)
172172
return mnt;
173173
}
174174

175-
void __mntput(struct vfsmount *mnt)
175+
static inline void __mntput(struct vfsmount *mnt)
176176
{
177177
struct super_block *sb = mnt->mnt_sb;
178178
dput(mnt->mnt_root);
179179
free_vfsmnt(mnt);
180180
deactivate_super(sb);
181181
}
182182

183-
EXPORT_SYMBOL(__mntput);
183+
void mntput_no_expire(struct vfsmount *mnt)
184+
{
185+
repeat:
186+
if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) {
187+
if (likely(!mnt->mnt_pinned)) {
188+
spin_unlock(&vfsmount_lock);
189+
__mntput(mnt);
190+
return;
191+
}
192+
atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
193+
mnt->mnt_pinned = 0;
194+
spin_unlock(&vfsmount_lock);
195+
acct_auto_close_mnt(mnt);
196+
security_sb_umount_close(mnt);
197+
goto repeat;
198+
}
199+
}
200+
201+
EXPORT_SYMBOL(mntput_no_expire);
202+
203+
void mnt_pin(struct vfsmount *mnt)
204+
{
205+
spin_lock(&vfsmount_lock);
206+
mnt->mnt_pinned++;
207+
spin_unlock(&vfsmount_lock);
208+
}
209+
210+
EXPORT_SYMBOL(mnt_pin);
211+
212+
void mnt_unpin(struct vfsmount *mnt)
213+
{
214+
spin_lock(&vfsmount_lock);
215+
if (mnt->mnt_pinned) {
216+
atomic_inc(&mnt->mnt_count);
217+
mnt->mnt_pinned--;
218+
}
219+
spin_unlock(&vfsmount_lock);
220+
}
221+
222+
EXPORT_SYMBOL(mnt_unpin);
184223

185224
/* iterator */
186225
static void *m_start(struct seq_file *m, loff_t *pos)
@@ -435,16 +474,6 @@ static int do_umount(struct vfsmount *mnt, int flags)
435474
down_write(&current->namespace->sem);
436475
spin_lock(&vfsmount_lock);
437476

438-
if (atomic_read(&sb->s_active) == 1) {
439-
/* last instance - try to be smart */
440-
spin_unlock(&vfsmount_lock);
441-
lock_kernel();
442-
DQUOT_OFF(sb);
443-
acct_auto_close(sb);
444-
unlock_kernel();
445-
security_sb_umount_close(mnt);
446-
spin_lock(&vfsmount_lock);
447-
}
448477
retval = -EBUSY;
449478
if (atomic_read(&mnt->mnt_count) == 2 || flags & MNT_DETACH) {
450479
if (!list_empty(&mnt->mnt_list))
@@ -850,17 +879,6 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts)
850879
detach_mnt(mnt, &old_nd);
851880
spin_unlock(&vfsmount_lock);
852881
path_release(&old_nd);
853-
854-
/*
855-
* Now lay it to rest if this was the last ref on the superblock
856-
*/
857-
if (atomic_read(&mnt->mnt_sb->s_active) == 1) {
858-
/* last instance - try to be smart */
859-
lock_kernel();
860-
DQUOT_OFF(mnt->mnt_sb);
861-
acct_auto_close(mnt->mnt_sb);
862-
unlock_kernel();
863-
}
864882
mntput(mnt);
865883
} else {
866884
/*

fs/super.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ void deactivate_super(struct super_block *s)
171171
if (atomic_dec_and_lock(&s->s_active, &sb_lock)) {
172172
s->s_count -= S_BIAS-1;
173173
spin_unlock(&sb_lock);
174+
DQUOT_OFF(s);
174175
down_write(&s->s_umount);
175176
fs->kill_sb(s);
176177
put_filesystem(fs);

include/linux/acct.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,12 +117,15 @@ struct acct_v3
117117
#include <linux/config.h>
118118

119119
#ifdef CONFIG_BSD_PROCESS_ACCT
120+
struct vfsmount;
120121
struct super_block;
122+
extern void acct_auto_close_mnt(struct vfsmount *m);
121123
extern void acct_auto_close(struct super_block *sb);
122124
extern void acct_process(long exitcode);
123125
extern void acct_update_integrals(struct task_struct *tsk);
124126
extern void acct_clear_integrals(struct task_struct *tsk);
125127
#else
128+
#define acct_auto_close_mnt(x) do { } while (0)
126129
#define acct_auto_close(x) do { } while (0)
127130
#define acct_process(x) do { } while (0)
128131
#define acct_update_integrals(x) do { } while (0)

include/linux/mount.h

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ struct vfsmount
3737
struct list_head mnt_list;
3838
struct list_head mnt_expire; /* link in fs-specific expiry list */
3939
struct namespace *mnt_namespace; /* containing namespace */
40+
int mnt_pinned;
4041
};
4142

4243
static inline struct vfsmount *mntget(struct vfsmount *mnt)
@@ -46,15 +47,9 @@ static inline struct vfsmount *mntget(struct vfsmount *mnt)
4647
return mnt;
4748
}
4849

49-
extern void __mntput(struct vfsmount *mnt);
50-
51-
static inline void mntput_no_expire(struct vfsmount *mnt)
52-
{
53-
if (mnt) {
54-
if (atomic_dec_and_test(&mnt->mnt_count))
55-
__mntput(mnt);
56-
}
57-
}
50+
extern void mntput_no_expire(struct vfsmount *mnt);
51+
extern void mnt_pin(struct vfsmount *mnt);
52+
extern void mnt_unpin(struct vfsmount *mnt);
5853

5954
static inline void mntput(struct vfsmount *mnt)
6055
{

include/linux/quota.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,6 @@ struct quota_info {
289289
struct semaphore dqonoff_sem; /* Serialize quotaon & quotaoff */
290290
struct rw_semaphore dqptr_sem; /* serialize ops using quota_info struct, pointers from inode to dquots */
291291
struct inode *files[MAXQUOTAS]; /* inodes of quotafiles */
292-
struct vfsmount *mnt[MAXQUOTAS]; /* mountpoint entries of filesystems with quota files */
293292
struct mem_dqinfo info[MAXQUOTAS]; /* Information for each quota type */
294293
struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */
295294
};

kernel/acct.c

Lines changed: 62 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
#include <linux/jiffies.h>
5555
#include <linux/times.h>
5656
#include <linux/syscalls.h>
57+
#include <linux/mount.h>
5758
#include <asm/uaccess.h>
5859
#include <asm/div64.h>
5960
#include <linux/blkdev.h> /* sector_div */
@@ -192,13 +193,50 @@ static void acct_file_reopen(struct file *file)
192193
add_timer(&acct_globals.timer);
193194
}
194195
if (old_acct) {
196+
mnt_unpin(old_acct->f_vfsmnt);
195197
spin_unlock(&acct_globals.lock);
196198
do_acct_process(0, old_acct);
197199
filp_close(old_acct, NULL);
198200
spin_lock(&acct_globals.lock);
199201
}
200202
}
201203

204+
static int acct_on(char *name)
205+
{
206+
struct file *file;
207+
int error;
208+
209+
/* Difference from BSD - they don't do O_APPEND */
210+
file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
211+
if (IS_ERR(file))
212+
return PTR_ERR(file);
213+
214+
if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
215+
filp_close(file, NULL);
216+
return -EACCES;
217+
}
218+
219+
if (!file->f_op->write) {
220+
filp_close(file, NULL);
221+
return -EIO;
222+
}
223+
224+
error = security_acct(file);
225+
if (error) {
226+
filp_close(file, NULL);
227+
return error;
228+
}
229+
230+
spin_lock(&acct_globals.lock);
231+
mnt_pin(file->f_vfsmnt);
232+
acct_file_reopen(file);
233+
spin_unlock(&acct_globals.lock);
234+
235+
mntput(file->f_vfsmnt); /* it's pinned, now give up active reference */
236+
237+
return 0;
238+
}
239+
202240
/**
203241
* sys_acct - enable/disable process accounting
204242
* @name: file name for accounting records or NULL to shutdown accounting
@@ -212,47 +250,41 @@ static void acct_file_reopen(struct file *file)
212250
*/
213251
asmlinkage long sys_acct(const char __user *name)
214252
{
215-
struct file *file = NULL;
216-
char *tmp;
217253
int error;
218254

219255
if (!capable(CAP_SYS_PACCT))
220256
return -EPERM;
221257

222258
if (name) {
223-
tmp = getname(name);
224-
if (IS_ERR(tmp)) {
259+
char *tmp = getname(name);
260+
if (IS_ERR(tmp))
225261
return (PTR_ERR(tmp));
226-
}
227-
/* Difference from BSD - they don't do O_APPEND */
228-
file = filp_open(tmp, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
262+
error = acct_on(tmp);
229263
putname(tmp);
230-
if (IS_ERR(file)) {
231-
return (PTR_ERR(file));
232-
}
233-
if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
234-
filp_close(file, NULL);
235-
return (-EACCES);
236-
}
237-
238-
if (!file->f_op->write) {
239-
filp_close(file, NULL);
240-
return (-EIO);
264+
} else {
265+
error = security_acct(NULL);
266+
if (!error) {
267+
spin_lock(&acct_globals.lock);
268+
acct_file_reopen(NULL);
269+
spin_unlock(&acct_globals.lock);
241270
}
242271
}
272+
return error;
273+
}
243274

244-
error = security_acct(file);
245-
if (error) {
246-
if (file)
247-
filp_close(file, NULL);
248-
return error;
249-
}
250-
275+
/**
276+
* acct_auto_close - turn off a filesystem's accounting if it is on
277+
* @m: vfsmount being shut down
278+
*
279+
* If the accounting is turned on for a file in the subtree pointed to
280+
* to by m, turn accounting off. Done when m is about to die.
281+
*/
282+
void acct_auto_close_mnt(struct vfsmount *m)
283+
{
251284
spin_lock(&acct_globals.lock);
252-
acct_file_reopen(file);
285+
if (acct_globals.file && acct_globals.file->f_vfsmnt == m)
286+
acct_file_reopen(NULL);
253287
spin_unlock(&acct_globals.lock);
254-
255-
return (0);
256288
}
257289

258290
/**
@@ -266,8 +298,8 @@ void acct_auto_close(struct super_block *sb)
266298
{
267299
spin_lock(&acct_globals.lock);
268300
if (acct_globals.file &&
269-
acct_globals.file->f_dentry->d_inode->i_sb == sb) {
270-
acct_file_reopen((struct file *)NULL);
301+
acct_globals.file->f_vfsmnt->mnt_sb == sb) {
302+
acct_file_reopen(NULL);
271303
}
272304
spin_unlock(&acct_globals.lock);
273305
}

0 commit comments

Comments
 (0)