Skip to content

Commit 3453d57

Browse files
author
Trond Myklebust
committed
NFSv4.1: Avoid false retries when RPC calls are interrupted
A 'false retry' in NFSv4.1 occurs when the client attempts to transmit a new RPC call using a slot+sequence number combination that references an already cached one. Currently, the Linux NFS client will do this if a user process interrupts an RPC call that is in progress. The problem with doing so is that we defeat the main mechanism used by the server to differentiate between a new call and a replayed one. Even if the server is able to perfectly cache the arguments of the old call, it cannot know if the client intended to replay or send a new call. The obvious fix is to bump the sequence number pre-emptively if an RPC call is interrupted, but in order to deal with the corner cases where the interrupted call is not actually received and processed by the server, we need to interpret the error NFS4ERR_SEQ_MISORDERED as a sign that we need to either wait or locate a correct sequence number that lies between the value we sent, and the last value that was acked by a SEQUENCE call on that slot. Signed-off-by: Trond Myklebust <[email protected]> Tested-by: Jason Tibbitts <[email protected]>
1 parent 6f903b1 commit 3453d57

File tree

3 files changed

+55
-60
lines changed

3 files changed

+55
-60
lines changed

fs/nfs/nfs4proc.c

Lines changed: 48 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -730,13 +730,25 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
730730
res->sr_slot = NULL;
731731
}
732732

733+
static void nfs4_slot_sequence_record_sent(struct nfs4_slot *slot,
734+
u32 seqnr)
735+
{
736+
if ((s32)(seqnr - slot->seq_nr_highest_sent) > 0)
737+
slot->seq_nr_highest_sent = seqnr;
738+
}
739+
static void nfs4_slot_sequence_acked(struct nfs4_slot *slot,
740+
u32 seqnr)
741+
{
742+
slot->seq_nr_highest_sent = seqnr;
743+
slot->seq_nr_last_acked = seqnr;
744+
}
745+
733746
static int nfs41_sequence_process(struct rpc_task *task,
734747
struct nfs4_sequence_res *res)
735748
{
736749
struct nfs4_session *session;
737750
struct nfs4_slot *slot = res->sr_slot;
738751
struct nfs_client *clp;
739-
bool interrupted = false;
740752
int ret = 1;
741753

742754
if (slot == NULL)
@@ -747,16 +759,12 @@ static int nfs41_sequence_process(struct rpc_task *task,
747759

748760
session = slot->table->session;
749761

750-
if (slot->interrupted) {
751-
if (res->sr_status != -NFS4ERR_DELAY)
752-
slot->interrupted = 0;
753-
interrupted = true;
754-
}
755-
756762
trace_nfs4_sequence_done(session, res);
757763
/* Check the SEQUENCE operation status */
758764
switch (res->sr_status) {
759765
case 0:
766+
/* Mark this sequence number as having been acked */
767+
nfs4_slot_sequence_acked(slot, slot->seq_nr);
760768
/* Update the slot's sequence and clientid lease timer */
761769
slot->seq_done = 1;
762770
clp = session->clp;
@@ -771,9 +779,9 @@ static int nfs41_sequence_process(struct rpc_task *task,
771779
* sr_status remains 1 if an RPC level error occurred.
772780
* The server may or may not have processed the sequence
773781
* operation..
774-
* Mark the slot as having hosted an interrupted RPC call.
775782
*/
776-
slot->interrupted = 1;
783+
nfs4_slot_sequence_record_sent(slot, slot->seq_nr);
784+
slot->seq_done = 1;
777785
goto out;
778786
case -NFS4ERR_DELAY:
779787
/* The server detected a resend of the RPC call and
@@ -784,13 +792,15 @@ static int nfs41_sequence_process(struct rpc_task *task,
784792
__func__,
785793
slot->slot_nr,
786794
slot->seq_nr);
795+
nfs4_slot_sequence_acked(slot, slot->seq_nr);
787796
goto out_retry;
788797
case -NFS4ERR_RETRY_UNCACHED_REP:
789798
case -NFS4ERR_SEQ_FALSE_RETRY:
790799
/*
791800
* The server thinks we tried to replay a request.
792801
* Retry the call after bumping the sequence ID.
793802
*/
803+
nfs4_slot_sequence_acked(slot, slot->seq_nr);
794804
goto retry_new_seq;
795805
case -NFS4ERR_BADSLOT:
796806
/*
@@ -801,21 +811,28 @@ static int nfs41_sequence_process(struct rpc_task *task,
801811
goto session_recover;
802812
goto retry_nowait;
803813
case -NFS4ERR_SEQ_MISORDERED:
814+
nfs4_slot_sequence_record_sent(slot, slot->seq_nr);
804815
/*
805-
* Was the last operation on this sequence interrupted?
806-
* If so, retry after bumping the sequence number.
816+
* Were one or more calls using this slot interrupted?
817+
* If the server never received the request, then our
818+
* transmitted slot sequence number may be too high.
807819
*/
808-
if (interrupted)
809-
goto retry_new_seq;
810-
/*
811-
* Could this slot have been previously retired?
812-
* If so, then the server may be expecting seq_nr = 1!
813-
*/
814-
if (slot->seq_nr != 1) {
815-
slot->seq_nr = 1;
820+
if ((s32)(slot->seq_nr - slot->seq_nr_last_acked) > 1) {
821+
slot->seq_nr--;
816822
goto retry_nowait;
817823
}
818-
goto session_recover;
824+
/*
825+
* RFC5661:
826+
* A retry might be sent while the original request is
827+
* still in progress on the replier. The replier SHOULD
828+
* deal with the issue by returning NFS4ERR_DELAY as the
829+
* reply to SEQUENCE or CB_SEQUENCE operation, but
830+
* implementations MAY return NFS4ERR_SEQ_MISORDERED.
831+
*
832+
* Restart the search after a delay.
833+
*/
834+
slot->seq_nr = slot->seq_nr_highest_sent;
835+
goto out_retry;
819836
default:
820837
/* Just update the slot sequence no. */
821838
slot->seq_done = 1;
@@ -906,17 +923,6 @@ static const struct rpc_call_ops nfs41_call_sync_ops = {
906923
.rpc_call_done = nfs41_call_sync_done,
907924
};
908925

909-
static void
910-
nfs4_sequence_process_interrupted(struct nfs_client *client,
911-
struct nfs4_slot *slot, const struct cred *cred)
912-
{
913-
struct rpc_task *task;
914-
915-
task = _nfs41_proc_sequence(client, cred, slot, true);
916-
if (!IS_ERR(task))
917-
rpc_put_task_async(task);
918-
}
919-
920926
#else /* !CONFIG_NFS_V4_1 */
921927

922928
static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
@@ -937,14 +943,6 @@ int nfs4_sequence_done(struct rpc_task *task,
937943
}
938944
EXPORT_SYMBOL_GPL(nfs4_sequence_done);
939945

940-
static void
941-
nfs4_sequence_process_interrupted(struct nfs_client *client,
942-
struct nfs4_slot *slot, const struct cred *cred)
943-
{
944-
WARN_ON_ONCE(1);
945-
slot->interrupted = 0;
946-
}
947-
948946
#endif /* !CONFIG_NFS_V4_1 */
949947

950948
static
@@ -982,26 +980,19 @@ int nfs4_setup_sequence(struct nfs_client *client,
982980
task->tk_timeout = 0;
983981
}
984982

985-
for (;;) {
986-
spin_lock(&tbl->slot_tbl_lock);
987-
/* The state manager will wait until the slot table is empty */
988-
if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
989-
goto out_sleep;
990-
991-
slot = nfs4_alloc_slot(tbl);
992-
if (IS_ERR(slot)) {
993-
/* Try again in 1/4 second */
994-
if (slot == ERR_PTR(-ENOMEM))
995-
task->tk_timeout = HZ >> 2;
996-
goto out_sleep;
997-
}
998-
spin_unlock(&tbl->slot_tbl_lock);
983+
spin_lock(&tbl->slot_tbl_lock);
984+
/* The state manager will wait until the slot table is empty */
985+
if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
986+
goto out_sleep;
999987

1000-
if (likely(!slot->interrupted))
1001-
break;
1002-
nfs4_sequence_process_interrupted(client,
1003-
slot, task->tk_msg.rpc_cred);
988+
slot = nfs4_alloc_slot(tbl);
989+
if (IS_ERR(slot)) {
990+
/* Try again in 1/4 second */
991+
if (slot == ERR_PTR(-ENOMEM))
992+
task->tk_timeout = HZ >> 2;
993+
goto out_sleep;
1004994
}
995+
spin_unlock(&tbl->slot_tbl_lock);
1005996

1006997
nfs4_sequence_attach_slot(args, res, slot);
1007998

fs/nfs/nfs4session.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ static struct nfs4_slot *nfs4_new_slot(struct nfs4_slot_table *tbl,
110110
slot->table = tbl;
111111
slot->slot_nr = slotid;
112112
slot->seq_nr = seq_init;
113+
slot->seq_nr_highest_sent = seq_init;
114+
slot->seq_nr_last_acked = seq_init - 1;
113115
}
114116
return slot;
115117
}
@@ -276,7 +278,8 @@ static void nfs4_reset_slot_table(struct nfs4_slot_table *tbl,
276278
p = &tbl->slots;
277279
while (*p) {
278280
(*p)->seq_nr = ivalue;
279-
(*p)->interrupted = 0;
281+
(*p)->seq_nr_highest_sent = ivalue;
282+
(*p)->seq_nr_last_acked = ivalue - 1;
280283
p = &(*p)->next;
281284
}
282285
tbl->highest_used_slotid = NFS4_NO_SLOT;

fs/nfs/nfs4session.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@ struct nfs4_slot {
2323
unsigned long generation;
2424
u32 slot_nr;
2525
u32 seq_nr;
26-
unsigned int interrupted : 1,
27-
privileged : 1,
26+
u32 seq_nr_last_acked;
27+
u32 seq_nr_highest_sent;
28+
unsigned int privileged : 1,
2829
seq_done : 1;
2930
};
3031

0 commit comments

Comments
 (0)