Skip to content

Commit 6faddda

Browse files
chuckleverbrauner
authored andcommitted
libfs: Add directory operations for stable offsets
Create a vector of directory operations in fs/libfs.c that handles directory seeks and readdir via stable offsets instead of the current cursor-based mechanism. For the moment these are unused. Signed-off-by: Chuck Lever <[email protected]> Message-Id: <168814732984.530310.11190772066786107220.stgit@manet.1015granger.net> Signed-off-by: Christian Brauner <[email protected]>
1 parent 509f006 commit 6faddda

File tree

4 files changed

+276
-4
lines changed

4 files changed

+276
-4
lines changed

Documentation/filesystems/locking.rst

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,13 +85,14 @@ prototypes::
8585
struct dentry *dentry, struct fileattr *fa);
8686
int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
8787
struct posix_acl * (*get_acl)(struct mnt_idmap *, struct dentry *, int);
88+
struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
8889

8990
locking rules:
9091
all may block
9192

92-
============== =============================================
93+
============== ==================================================
9394
ops i_rwsem(inode)
94-
============== =============================================
95+
============== ==================================================
9596
lookup: shared
9697
create: exclusive
9798
link: exclusive (both)
@@ -115,7 +116,8 @@ atomic_open: shared (exclusive if O_CREAT is set in open flags)
115116
tmpfile: no
116117
fileattr_get: no or exclusive
117118
fileattr_set: exclusive
118-
============== =============================================
119+
get_offset_ctx no
120+
============== ==================================================
119121

120122

121123
Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem

Documentation/filesystems/vfs.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,7 @@ As of kernel 2.6.22, the following members are defined:
515515
int (*fileattr_set)(struct mnt_idmap *idmap,
516516
struct dentry *dentry, struct fileattr *fa);
517517
int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
518+
struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
518519
};
519520
520521
Again, all methods are called without any locks being held, unless
@@ -675,7 +676,10 @@ otherwise noted.
675676
called on ioctl(FS_IOC_SETFLAGS) and ioctl(FS_IOC_FSSETXATTR) to
676677
change miscellaneous file flags and attributes. Callers hold
677678
i_rwsem exclusive. If unset, then fall back to f_op->ioctl().
678-
679+
``get_offset_ctx``
680+
called to get the offset context for a directory inode. A
681+
filesystem must define this operation to use
682+
simple_offset_dir_operations.
679683

680684
The Address Space Object
681685
========================

fs/libfs.c

Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,254 @@ const struct inode_operations simple_dir_inode_operations = {
239239
};
240240
EXPORT_SYMBOL(simple_dir_inode_operations);
241241

242+
static void offset_set(struct dentry *dentry, u32 offset)
243+
{
244+
dentry->d_fsdata = (void *)((uintptr_t)(offset));
245+
}
246+
247+
static u32 dentry2offset(struct dentry *dentry)
248+
{
249+
return (u32)((uintptr_t)(dentry->d_fsdata));
250+
}
251+
252+
/**
253+
* simple_offset_init - initialize an offset_ctx
254+
* @octx: directory offset map to be initialized
255+
*
256+
*/
257+
void simple_offset_init(struct offset_ctx *octx)
258+
{
259+
xa_init_flags(&octx->xa, XA_FLAGS_ALLOC1);
260+
261+
/* 0 is '.', 1 is '..', so always start with offset 2 */
262+
octx->next_offset = 2;
263+
}
264+
265+
/**
266+
* simple_offset_add - Add an entry to a directory's offset map
267+
* @octx: directory offset ctx to be updated
268+
* @dentry: new dentry being added
269+
*
270+
* Returns zero on success. @so_ctx and the dentry offset are updated.
271+
* Otherwise, a negative errno value is returned.
272+
*/
273+
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
274+
{
275+
static const struct xa_limit limit = XA_LIMIT(2, U32_MAX);
276+
u32 offset;
277+
int ret;
278+
279+
if (dentry2offset(dentry) != 0)
280+
return -EBUSY;
281+
282+
ret = xa_alloc_cyclic(&octx->xa, &offset, dentry, limit,
283+
&octx->next_offset, GFP_KERNEL);
284+
if (ret < 0)
285+
return ret;
286+
287+
offset_set(dentry, offset);
288+
return 0;
289+
}
290+
291+
/**
292+
* simple_offset_remove - Remove an entry to a directory's offset map
293+
* @octx: directory offset ctx to be updated
294+
* @dentry: dentry being removed
295+
*
296+
*/
297+
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry)
298+
{
299+
u32 offset;
300+
301+
offset = dentry2offset(dentry);
302+
if (offset == 0)
303+
return;
304+
305+
xa_erase(&octx->xa, offset);
306+
offset_set(dentry, 0);
307+
}
308+
309+
/**
310+
* simple_offset_rename_exchange - exchange rename with directory offsets
311+
* @old_dir: parent of dentry being moved
312+
* @old_dentry: dentry being moved
313+
* @new_dir: destination parent
314+
* @new_dentry: destination dentry
315+
*
316+
* Returns zero on success. Otherwise a negative errno is returned and the
317+
* rename is rolled back.
318+
*/
319+
int simple_offset_rename_exchange(struct inode *old_dir,
320+
struct dentry *old_dentry,
321+
struct inode *new_dir,
322+
struct dentry *new_dentry)
323+
{
324+
struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir);
325+
struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir);
326+
u32 old_index = dentry2offset(old_dentry);
327+
u32 new_index = dentry2offset(new_dentry);
328+
int ret;
329+
330+
simple_offset_remove(old_ctx, old_dentry);
331+
simple_offset_remove(new_ctx, new_dentry);
332+
333+
ret = simple_offset_add(new_ctx, old_dentry);
334+
if (ret)
335+
goto out_restore;
336+
337+
ret = simple_offset_add(old_ctx, new_dentry);
338+
if (ret) {
339+
simple_offset_remove(new_ctx, old_dentry);
340+
goto out_restore;
341+
}
342+
343+
ret = simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
344+
if (ret) {
345+
simple_offset_remove(new_ctx, old_dentry);
346+
simple_offset_remove(old_ctx, new_dentry);
347+
goto out_restore;
348+
}
349+
return 0;
350+
351+
out_restore:
352+
offset_set(old_dentry, old_index);
353+
xa_store(&old_ctx->xa, old_index, old_dentry, GFP_KERNEL);
354+
offset_set(new_dentry, new_index);
355+
xa_store(&new_ctx->xa, new_index, new_dentry, GFP_KERNEL);
356+
return ret;
357+
}
358+
359+
/**
360+
* simple_offset_destroy - Release offset map
361+
* @octx: directory offset ctx that is about to be destroyed
362+
*
363+
* During fs teardown (eg. umount), a directory's offset map might still
364+
* contain entries. xa_destroy() cleans out anything that remains.
365+
*/
366+
void simple_offset_destroy(struct offset_ctx *octx)
367+
{
368+
xa_destroy(&octx->xa);
369+
}
370+
371+
/**
372+
* offset_dir_llseek - Advance the read position of a directory descriptor
373+
* @file: an open directory whose position is to be updated
374+
* @offset: a byte offset
375+
* @whence: enumerator describing the starting position for this update
376+
*
377+
* SEEK_END, SEEK_DATA, and SEEK_HOLE are not supported for directories.
378+
*
379+
* Returns the updated read position if successful; otherwise a
380+
* negative errno is returned and the read position remains unchanged.
381+
*/
382+
static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
383+
{
384+
switch (whence) {
385+
case SEEK_CUR:
386+
offset += file->f_pos;
387+
fallthrough;
388+
case SEEK_SET:
389+
if (offset >= 0)
390+
break;
391+
fallthrough;
392+
default:
393+
return -EINVAL;
394+
}
395+
396+
return vfs_setpos(file, offset, U32_MAX);
397+
}
398+
399+
static struct dentry *offset_find_next(struct xa_state *xas)
400+
{
401+
struct dentry *child, *found = NULL;
402+
403+
rcu_read_lock();
404+
child = xas_next_entry(xas, U32_MAX);
405+
if (!child)
406+
goto out;
407+
spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
408+
if (simple_positive(child))
409+
found = dget_dlock(child);
410+
spin_unlock(&child->d_lock);
411+
out:
412+
rcu_read_unlock();
413+
return found;
414+
}
415+
416+
static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
417+
{
418+
u32 offset = dentry2offset(dentry);
419+
struct inode *inode = d_inode(dentry);
420+
421+
return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset,
422+
inode->i_ino, fs_umode_to_dtype(inode->i_mode));
423+
}
424+
425+
static void offset_iterate_dir(struct dentry *dir, struct dir_context *ctx)
426+
{
427+
struct inode *inode = d_inode(dir);
428+
struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
429+
XA_STATE(xas, &so_ctx->xa, ctx->pos);
430+
struct dentry *dentry;
431+
432+
while (true) {
433+
spin_lock(&dir->d_lock);
434+
dentry = offset_find_next(&xas);
435+
spin_unlock(&dir->d_lock);
436+
if (!dentry)
437+
break;
438+
439+
if (!offset_dir_emit(ctx, dentry)) {
440+
dput(dentry);
441+
break;
442+
}
443+
444+
dput(dentry);
445+
ctx->pos = xas.xa_index + 1;
446+
}
447+
}
448+
449+
/**
450+
* offset_readdir - Emit entries starting at offset @ctx->pos
451+
* @file: an open directory to iterate over
452+
* @ctx: directory iteration context
453+
*
454+
* Caller must hold @file's i_rwsem to prevent insertion or removal of
455+
* entries during this call.
456+
*
457+
* On entry, @ctx->pos contains an offset that represents the first entry
458+
* to be read from the directory.
459+
*
460+
* The operation continues until there are no more entries to read, or
461+
* until the ctx->actor indicates there is no more space in the caller's
462+
* output buffer.
463+
*
464+
* On return, @ctx->pos contains an offset that will read the next entry
465+
* in this directory when shmem_readdir() is called again with @ctx.
466+
*
467+
* Return values:
468+
* %0 - Complete
469+
*/
470+
static int offset_readdir(struct file *file, struct dir_context *ctx)
471+
{
472+
struct dentry *dir = file->f_path.dentry;
473+
474+
lockdep_assert_held(&d_inode(dir)->i_rwsem);
475+
476+
if (!dir_emit_dots(file, ctx))
477+
return 0;
478+
479+
offset_iterate_dir(dir, ctx);
480+
return 0;
481+
}
482+
483+
const struct file_operations simple_offset_dir_operations = {
484+
.llseek = offset_dir_llseek,
485+
.iterate_shared = offset_readdir,
486+
.read = generic_read_dir,
487+
.fsync = noop_fsync,
488+
};
489+
242490
static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
243491
{
244492
struct dentry *child = NULL;

include/linux/fs.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1770,6 +1770,7 @@ struct dir_context {
17701770

17711771
struct iov_iter;
17721772
struct io_uring_cmd;
1773+
struct offset_ctx;
17731774

17741775
struct file_operations {
17751776
struct module *owner;
@@ -1857,6 +1858,7 @@ struct inode_operations {
18571858
int (*fileattr_set)(struct mnt_idmap *idmap,
18581859
struct dentry *dentry, struct fileattr *fa);
18591860
int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
1861+
struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
18601862
} ____cacheline_aligned;
18611863

18621864
static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
@@ -2971,6 +2973,22 @@ extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
29712973
extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
29722974
const void __user *from, size_t count);
29732975

2976+
struct offset_ctx {
2977+
struct xarray xa;
2978+
u32 next_offset;
2979+
};
2980+
2981+
void simple_offset_init(struct offset_ctx *octx);
2982+
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
2983+
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
2984+
int simple_offset_rename_exchange(struct inode *old_dir,
2985+
struct dentry *old_dentry,
2986+
struct inode *new_dir,
2987+
struct dentry *new_dentry);
2988+
void simple_offset_destroy(struct offset_ctx *octx);
2989+
2990+
extern const struct file_operations simple_offset_dir_operations;
2991+
29742992
extern int __generic_file_fsync(struct file *, loff_t, loff_t, int);
29752993
extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
29762994

0 commit comments

Comments
 (0)