Skip to content

Commit 0b05b18

Browse files
avatiMiklos Szeredi
authored andcommitted
fuse: implement NFS-like readdirplus support
This patch implements readdirplus support in FUSE, similar to NFS. The payload returned in the readdirplus call contains 'fuse_entry_out' structure thereby providing all the necessary inputs for 'faking' a lookup() operation on the spot. If the dentry and inode already existed (for e.g. in a re-run of ls -l) then just the inode attributes timeout and dentry timeout are refreshed. With a simple client->network->server implementation of a FUSE based filesystem, the following performance observations were made: Test: Performing a filesystem crawl over 20,000 files with sh# time ls -lR /mnt Without readdirplus: Run 1: 18.1s Run 2: 16.0s Run 3: 16.2s With readdirplus: Run 1: 4.1s Run 2: 3.8s Run 3: 3.8s The performance improvement is significant as it avoided 20,000 upcalls calls (lookup). Cache consistency is no worse than what already is. Signed-off-by: Anand V. Avati <[email protected]> Signed-off-by: Miklos Szeredi <[email protected]>
1 parent ff7532c commit 0b05b18

File tree

5 files changed

+197
-5
lines changed

5 files changed

+197
-5
lines changed

fs/fuse/dev.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,25 @@ void fuse_request_send_background_locked(struct fuse_conn *fc,
491491
fuse_request_send_nowait_locked(fc, req);
492492
}
493493

494+
void fuse_force_forget(struct file *file, u64 nodeid)
495+
{
496+
struct inode *inode = file->f_path.dentry->d_inode;
497+
struct fuse_conn *fc = get_fuse_conn(inode);
498+
struct fuse_req *req;
499+
struct fuse_forget_in inarg;
500+
501+
memset(&inarg, 0, sizeof(inarg));
502+
inarg.nlookup = 1;
503+
req = fuse_get_req_nofail(fc, file);
504+
req->in.h.opcode = FUSE_FORGET;
505+
req->in.h.nodeid = nodeid;
506+
req->in.numargs = 1;
507+
req->in.args[0].size = sizeof(inarg);
508+
req->in.args[0].value = &inarg;
509+
req->isreply = 0;
510+
fuse_request_send_nowait(fc, req);
511+
}
512+
494513
/*
495514
* Lock the request. Up to the next unlock_request() there mustn't be
496515
* anything that could cause a page-fault. If the request was already

fs/fuse/dir.c

Lines changed: 156 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1155,6 +1155,143 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
11551155
return 0;
11561156
}
11571157

1158+
static int fuse_direntplus_link(struct file *file,
1159+
struct fuse_direntplus *direntplus,
1160+
u64 attr_version)
1161+
{
1162+
int err;
1163+
struct fuse_entry_out *o = &direntplus->entry_out;
1164+
struct fuse_dirent *dirent = &direntplus->dirent;
1165+
struct dentry *parent = file->f_path.dentry;
1166+
struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
1167+
struct dentry *dentry;
1168+
struct dentry *alias;
1169+
struct inode *dir = parent->d_inode;
1170+
struct fuse_conn *fc;
1171+
struct inode *inode;
1172+
1173+
if (!o->nodeid) {
1174+
/*
1175+
* Unlike in the case of fuse_lookup, zero nodeid does not mean
1176+
* ENOENT. Instead, it only means the userspace filesystem did
1177+
* not want to return attributes/handle for this entry.
1178+
*
1179+
* So do nothing.
1180+
*/
1181+
return 0;
1182+
}
1183+
1184+
if (name.name[0] == '.') {
1185+
/*
1186+
* We could potentially refresh the attributes of the directory
1187+
* and its parent?
1188+
*/
1189+
if (name.len == 1)
1190+
return 0;
1191+
if (name.name[1] == '.' && name.len == 2)
1192+
return 0;
1193+
}
1194+
fc = get_fuse_conn(dir);
1195+
1196+
name.hash = full_name_hash(name.name, name.len);
1197+
dentry = d_lookup(parent, &name);
1198+
if (dentry && dentry->d_inode) {
1199+
inode = dentry->d_inode;
1200+
if (get_node_id(inode) == o->nodeid) {
1201+
struct fuse_inode *fi;
1202+
fi = get_fuse_inode(inode);
1203+
spin_lock(&fc->lock);
1204+
fi->nlookup++;
1205+
spin_unlock(&fc->lock);
1206+
1207+
/*
1208+
* The other branch to 'found' comes via fuse_iget()
1209+
* which bumps nlookup inside
1210+
*/
1211+
goto found;
1212+
}
1213+
err = d_invalidate(dentry);
1214+
if (err)
1215+
goto out;
1216+
dput(dentry);
1217+
dentry = NULL;
1218+
}
1219+
1220+
dentry = d_alloc(parent, &name);
1221+
err = -ENOMEM;
1222+
if (!dentry)
1223+
goto out;
1224+
1225+
inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
1226+
&o->attr, entry_attr_timeout(o), attr_version);
1227+
if (!inode)
1228+
goto out;
1229+
1230+
alias = d_materialise_unique(dentry, inode);
1231+
err = PTR_ERR(alias);
1232+
if (IS_ERR(alias))
1233+
goto out;
1234+
if (alias) {
1235+
dput(dentry);
1236+
dentry = alias;
1237+
}
1238+
1239+
found:
1240+
fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o),
1241+
attr_version);
1242+
1243+
fuse_change_entry_timeout(dentry, o);
1244+
1245+
err = 0;
1246+
out:
1247+
if (dentry)
1248+
dput(dentry);
1249+
return err;
1250+
}
1251+
1252+
static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
1253+
void *dstbuf, filldir_t filldir, u64 attr_version)
1254+
{
1255+
struct fuse_direntplus *direntplus;
1256+
struct fuse_dirent *dirent;
1257+
size_t reclen;
1258+
int over = 0;
1259+
int ret;
1260+
1261+
while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
1262+
direntplus = (struct fuse_direntplus *) buf;
1263+
dirent = &direntplus->dirent;
1264+
reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
1265+
1266+
if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1267+
return -EIO;
1268+
if (reclen > nbytes)
1269+
break;
1270+
1271+
if (!over) {
1272+
/* We fill entries into dstbuf only as much as
1273+
it can hold. But we still continue iterating
1274+
over remaining entries to link them. If not,
1275+
we need to send a FORGET for each of those
1276+
which we did not link.
1277+
*/
1278+
over = filldir(dstbuf, dirent->name, dirent->namelen,
1279+
file->f_pos, dirent->ino,
1280+
dirent->type);
1281+
file->f_pos = dirent->off;
1282+
}
1283+
1284+
buf += reclen;
1285+
nbytes -= reclen;
1286+
1287+
ret = fuse_direntplus_link(file, direntplus, attr_version);
1288+
if (ret)
1289+
fuse_force_forget(file, direntplus->entry_out.nodeid);
1290+
}
1291+
1292+
return 0;
1293+
}
1294+
11581295
static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
11591296
{
11601297
int err;
@@ -1163,6 +1300,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
11631300
struct inode *inode = file->f_path.dentry->d_inode;
11641301
struct fuse_conn *fc = get_fuse_conn(inode);
11651302
struct fuse_req *req;
1303+
u64 attr_version = 0;
11661304

11671305
if (is_bad_inode(inode))
11681306
return -EIO;
@@ -1179,14 +1317,28 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
11791317
req->out.argpages = 1;
11801318
req->num_pages = 1;
11811319
req->pages[0] = page;
1182-
fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR);
1320+
if (fc->do_readdirplus) {
1321+
attr_version = fuse_get_attr_version(fc);
1322+
fuse_read_fill(req, file, file->f_pos, PAGE_SIZE,
1323+
FUSE_READDIRPLUS);
1324+
} else {
1325+
fuse_read_fill(req, file, file->f_pos, PAGE_SIZE,
1326+
FUSE_READDIR);
1327+
}
11831328
fuse_request_send(fc, req);
11841329
nbytes = req->out.args[0].size;
11851330
err = req->out.h.error;
11861331
fuse_put_request(fc, req);
1187-
if (!err)
1188-
err = parse_dirfile(page_address(page), nbytes, file, dstbuf,
1189-
filldir);
1332+
if (!err) {
1333+
if (fc->do_readdirplus) {
1334+
err = parse_dirplusfile(page_address(page), nbytes,
1335+
file, dstbuf, filldir,
1336+
attr_version);
1337+
} else {
1338+
err = parse_dirfile(page_address(page), nbytes, file,
1339+
dstbuf, filldir);
1340+
}
1341+
}
11901342

11911343
__free_page(page);
11921344
fuse_invalidate_attr(inode); /* atime changed */

fs/fuse/fuse_i.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,9 @@ struct fuse_conn {
487487
/** Use enhanced/automatic page cache invalidation. */
488488
unsigned auto_inval_data:1;
489489

490+
/** Does the filesystem support readdir-plus? */
491+
unsigned do_readdirplus:1;
492+
490493
/** The number of requests waiting for completion */
491494
atomic_t num_waiting;
492495

@@ -578,6 +581,9 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
578581

579582
struct fuse_forget_link *fuse_alloc_forget(void);
580583

584+
/* Used by READDIRPLUS */
585+
void fuse_force_forget(struct file *file, u64 nodeid);
586+
581587
/**
582588
* Initialize READ or READDIR request
583589
*/

fs/fuse/inode.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -863,6 +863,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
863863
fc->dont_mask = 1;
864864
if (arg->flags & FUSE_AUTO_INVAL_DATA)
865865
fc->auto_inval_data = 1;
866+
if (arg->flags & FUSE_DO_READDIRPLUS)
867+
fc->do_readdirplus = 1;
866868
} else {
867869
ra_pages = fc->max_read / PAGE_CACHE_SIZE;
868870
fc->no_lock = 1;
@@ -889,7 +891,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
889891
arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
890892
FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
891893
FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
892-
FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA;
894+
FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
895+
FUSE_DO_READDIRPLUS;
893896
req->in.h.opcode = FUSE_INIT;
894897
req->in.numargs = 1;
895898
req->in.args[0].size = sizeof(*arg);

include/uapi/linux/fuse.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ struct fuse_file_lock {
193193
#define FUSE_FLOCK_LOCKS (1 << 10)
194194
#define FUSE_HAS_IOCTL_DIR (1 << 11)
195195
#define FUSE_AUTO_INVAL_DATA (1 << 12)
196+
#define FUSE_DO_READDIRPLUS (1 << 13)
196197

197198
/**
198199
* CUSE INIT request/reply flags
@@ -299,6 +300,7 @@ enum fuse_opcode {
299300
FUSE_NOTIFY_REPLY = 41,
300301
FUSE_BATCH_FORGET = 42,
301302
FUSE_FALLOCATE = 43,
303+
FUSE_READDIRPLUS = 44,
302304

303305
/* CUSE specific operations */
304306
CUSE_INIT = 4096,
@@ -630,6 +632,16 @@ struct fuse_dirent {
630632
#define FUSE_DIRENT_SIZE(d) \
631633
FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
632634

635+
struct fuse_direntplus {
636+
struct fuse_entry_out entry_out;
637+
struct fuse_dirent dirent;
638+
};
639+
640+
#define FUSE_NAME_OFFSET_DIRENTPLUS \
641+
offsetof(struct fuse_direntplus, dirent.name)
642+
#define FUSE_DIRENTPLUS_SIZE(d) \
643+
FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen)
644+
633645
struct fuse_notify_inval_inode_out {
634646
__u64 ino;
635647
__s64 off;

0 commit comments

Comments
 (0)