Skip to content

Commit fd9a8d7

Browse files
Trond MyklebustTrond Myklebust
authored andcommitted
NFSv4.1: Fix bulk recall and destroy of layouts
The current code in pnfs_destroy_all_layouts() assumes that removing the layout from the server->layouts list is sufficient to make it invisible to other processes. This ignores the fact that most users access the layout through the nfs_inode->layout... There is further breakage due to lack of reference counting of the layouts, meaning that the whole thing Oopses at the drop of a hat. The code in initiate_bulk_draining() is almost correct, and can be used as a model for pnfs_destroy_all_layouts(), so move that code to pnfs.c, and refactor the code to allow us to choose between a single filesystem bulk recall, and a recall of all layouts. Also note that initiate_bulk_draining() currently calls iput() while holding locks. Fix that too. Signed-off-by: Trond Myklebust <[email protected]> Cc: [email protected]
1 parent c8da19b commit fd9a8d7

File tree

3 files changed

+144
-74
lines changed

3 files changed

+144
-74
lines changed

fs/nfs/callback_proc.c

Lines changed: 8 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -183,60 +183,15 @@ static u32 initiate_file_draining(struct nfs_client *clp,
183183
static u32 initiate_bulk_draining(struct nfs_client *clp,
184184
struct cb_layoutrecallargs *args)
185185
{
186-
struct nfs_server *server;
187-
struct pnfs_layout_hdr *lo;
188-
struct inode *ino;
189-
u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
190-
struct pnfs_layout_hdr *tmp;
191-
LIST_HEAD(recall_list);
192-
LIST_HEAD(free_me_list);
193-
struct pnfs_layout_range range = {
194-
.iomode = IOMODE_ANY,
195-
.offset = 0,
196-
.length = NFS4_MAX_UINT64,
197-
};
198-
199-
spin_lock(&clp->cl_lock);
200-
rcu_read_lock();
201-
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
202-
if ((args->cbl_recall_type == RETURN_FSID) &&
203-
memcmp(&server->fsid, &args->cbl_fsid,
204-
sizeof(struct nfs_fsid)))
205-
continue;
186+
int stat;
206187

207-
list_for_each_entry(lo, &server->layouts, plh_layouts) {
208-
ino = igrab(lo->plh_inode);
209-
if (!ino)
210-
continue;
211-
spin_lock(&ino->i_lock);
212-
/* Is this layout in the process of being freed? */
213-
if (NFS_I(ino)->layout != lo) {
214-
spin_unlock(&ino->i_lock);
215-
iput(ino);
216-
continue;
217-
}
218-
pnfs_get_layout_hdr(lo);
219-
spin_unlock(&ino->i_lock);
220-
list_add(&lo->plh_bulk_recall, &recall_list);
221-
}
222-
}
223-
rcu_read_unlock();
224-
spin_unlock(&clp->cl_lock);
225-
226-
list_for_each_entry_safe(lo, tmp,
227-
&recall_list, plh_bulk_recall) {
228-
ino = lo->plh_inode;
229-
spin_lock(&ino->i_lock);
230-
set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
231-
if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, &range))
232-
rv = NFS4ERR_DELAY;
233-
list_del_init(&lo->plh_bulk_recall);
234-
spin_unlock(&ino->i_lock);
235-
pnfs_free_lseg_list(&free_me_list);
236-
pnfs_put_layout_hdr(lo);
237-
iput(ino);
238-
}
239-
return rv;
188+
if (args->cbl_recall_type == RETURN_FSID)
189+
stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true);
190+
else
191+
stat = pnfs_destroy_layouts_byclid(clp, true);
192+
if (stat != 0)
193+
return NFS4ERR_DELAY;
194+
return NFS4ERR_NOMATCHING_LAYOUT;
240195
}
241196

242197
static u32 do_callback_layoutrecall(struct nfs_client *clp,

fs/nfs/pnfs.c

Lines changed: 130 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -505,37 +505,147 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
505505
}
506506
EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
507507

508-
/*
509-
* Called by the state manger to remove all layouts established under an
510-
* expired lease.
511-
*/
512-
void
513-
pnfs_destroy_all_layouts(struct nfs_client *clp)
508+
static bool
509+
pnfs_layout_add_bulk_destroy_list(struct inode *inode,
510+
struct list_head *layout_list)
514511
{
515-
struct nfs_server *server;
516512
struct pnfs_layout_hdr *lo;
517-
LIST_HEAD(tmp_list);
513+
bool ret = false;
518514

519-
nfs4_deviceid_mark_client_invalid(clp);
520-
nfs4_deviceid_purge_client(clp);
515+
spin_lock(&inode->i_lock);
516+
lo = NFS_I(inode)->layout;
517+
if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) {
518+
pnfs_get_layout_hdr(lo);
519+
list_add(&lo->plh_bulk_destroy, layout_list);
520+
ret = true;
521+
}
522+
spin_unlock(&inode->i_lock);
523+
return ret;
524+
}
525+
526+
/* Caller must hold rcu_read_lock and clp->cl_lock */
527+
static int
528+
pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
529+
struct nfs_server *server,
530+
struct list_head *layout_list)
531+
{
532+
struct pnfs_layout_hdr *lo, *next;
533+
struct inode *inode;
534+
535+
list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) {
536+
inode = igrab(lo->plh_inode);
537+
if (inode == NULL)
538+
continue;
539+
list_del_init(&lo->plh_layouts);
540+
if (pnfs_layout_add_bulk_destroy_list(inode, layout_list))
541+
continue;
542+
rcu_read_unlock();
543+
spin_unlock(&clp->cl_lock);
544+
iput(inode);
545+
spin_lock(&clp->cl_lock);
546+
rcu_read_lock();
547+
return -EAGAIN;
548+
}
549+
return 0;
550+
}
551+
552+
static int
553+
pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
554+
bool is_bulk_recall)
555+
{
556+
struct pnfs_layout_hdr *lo;
557+
struct inode *inode;
558+
struct pnfs_layout_range range = {
559+
.iomode = IOMODE_ANY,
560+
.offset = 0,
561+
.length = NFS4_MAX_UINT64,
562+
};
563+
LIST_HEAD(lseg_list);
564+
int ret = 0;
565+
566+
while (!list_empty(layout_list)) {
567+
lo = list_entry(layout_list->next, struct pnfs_layout_hdr,
568+
plh_bulk_destroy);
569+
dprintk("%s freeing layout for inode %lu\n", __func__,
570+
lo->plh_inode->i_ino);
571+
inode = lo->plh_inode;
572+
spin_lock(&inode->i_lock);
573+
list_del_init(&lo->plh_bulk_destroy);
574+
lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
575+
if (is_bulk_recall)
576+
set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
577+
if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range))
578+
ret = -EAGAIN;
579+
spin_unlock(&inode->i_lock);
580+
pnfs_free_lseg_list(&lseg_list);
581+
pnfs_put_layout_hdr(lo);
582+
iput(inode);
583+
}
584+
return ret;
585+
}
586+
587+
int
588+
pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
589+
struct nfs_fsid *fsid,
590+
bool is_recall)
591+
{
592+
struct nfs_server *server;
593+
LIST_HEAD(layout_list);
521594

522595
spin_lock(&clp->cl_lock);
523596
rcu_read_lock();
597+
restart:
524598
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
525-
if (!list_empty(&server->layouts))
526-
list_splice_init(&server->layouts, &tmp_list);
599+
if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0)
600+
continue;
601+
if (pnfs_layout_bulk_destroy_byserver_locked(clp,
602+
server,
603+
&layout_list) != 0)
604+
goto restart;
527605
}
528606
rcu_read_unlock();
529607
spin_unlock(&clp->cl_lock);
530608

531-
while (!list_empty(&tmp_list)) {
532-
lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
533-
plh_layouts);
534-
dprintk("%s freeing layout for inode %lu\n", __func__,
535-
lo->plh_inode->i_ino);
536-
list_del_init(&lo->plh_layouts);
537-
pnfs_destroy_layout(NFS_I(lo->plh_inode));
609+
if (list_empty(&layout_list))
610+
return 0;
611+
return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
612+
}
613+
614+
int
615+
pnfs_destroy_layouts_byclid(struct nfs_client *clp,
616+
bool is_recall)
617+
{
618+
struct nfs_server *server;
619+
LIST_HEAD(layout_list);
620+
621+
spin_lock(&clp->cl_lock);
622+
rcu_read_lock();
623+
restart:
624+
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
625+
if (pnfs_layout_bulk_destroy_byserver_locked(clp,
626+
server,
627+
&layout_list) != 0)
628+
goto restart;
538629
}
630+
rcu_read_unlock();
631+
spin_unlock(&clp->cl_lock);
632+
633+
if (list_empty(&layout_list))
634+
return 0;
635+
return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
636+
}
637+
638+
/*
639+
* Called by the state manger to remove all layouts established under an
640+
* expired lease.
641+
*/
642+
void
643+
pnfs_destroy_all_layouts(struct nfs_client *clp)
644+
{
645+
nfs4_deviceid_mark_client_invalid(clp);
646+
nfs4_deviceid_purge_client(clp);
647+
648+
pnfs_destroy_layouts_byclid(clp, false);
539649
}
540650

541651
/*
@@ -888,7 +998,7 @@ alloc_init_layout_hdr(struct inode *ino,
888998
atomic_set(&lo->plh_refcount, 1);
889999
INIT_LIST_HEAD(&lo->plh_layouts);
8901000
INIT_LIST_HEAD(&lo->plh_segs);
891-
INIT_LIST_HEAD(&lo->plh_bulk_recall);
1001+
INIT_LIST_HEAD(&lo->plh_bulk_destroy);
8921002
lo->plh_inode = ino;
8931003
lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred);
8941004
return lo;

fs/nfs/pnfs.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ struct pnfs_layoutdriver_type {
132132
struct pnfs_layout_hdr {
133133
atomic_t plh_refcount;
134134
struct list_head plh_layouts; /* other client layouts */
135-
struct list_head plh_bulk_recall; /* clnt list of bulk recalls */
135+
struct list_head plh_bulk_destroy;
136136
struct list_head plh_segs; /* layout segments list */
137137
nfs4_stateid plh_stateid;
138138
atomic_t plh_outstanding; /* number of RPCs out */
@@ -196,6 +196,11 @@ struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
196196
void pnfs_free_lseg_list(struct list_head *tmp_list);
197197
void pnfs_destroy_layout(struct nfs_inode *);
198198
void pnfs_destroy_all_layouts(struct nfs_client *);
199+
int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
200+
struct nfs_fsid *fsid,
201+
bool is_recall);
202+
int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
203+
bool is_recall);
199204
void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
200205
void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
201206
const nfs4_stateid *new,

0 commit comments

Comments
 (0)