Skip to content

Commit 0208da9

Browse files
Mike Marciniszyndledford
authored andcommitted
IB/rdmavt: Handle dereg of inuse MRs properly
A destroy of an MR prior to destroying the QP can cause the following diagnostic if the QP is referencing the MR being de-registered: hfi1 0000:05:00.0: hfi1_0: rvt_dereg_mr timeout mr ffff8808562108 00 pd ffff880859b20b00 The solution is to when the a non-zero refcount is encountered when the MR is destroyed the QPs needs to be iterated looking for QPs in the same PD as the MR. If rvt_qp_mr_clean() detects any such QP references the rkey/lkey, the QP needs to be put into an error state via a call to rvt_qp_error() which will trigger the clean up of any stuck references. This solution is as specified in IBTA 1.3 Volume 1 11.2.10.5. [This is reproduced with the 0.4.9 version of qperf and the rc_bw test] Reviewed-by: Dennis Dalessandro <[email protected]> Signed-off-by: Mike Marciniszyn <[email protected]> Signed-off-by: Dennis Dalessandro <[email protected]> Signed-off-by: Doug Ledford <[email protected]>
1 parent 557fafe commit 0208da9

File tree

4 files changed

+216
-21
lines changed

4 files changed

+216
-21
lines changed

drivers/infiniband/sw/rdmavt/mr.c

Lines changed: 104 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,105 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
440440
return ret;
441441
}
442442

443+
/**
444+
* rvt_dereg_clean_qp_cb - callback from iterator
445+
* @qp - the qp
446+
* @v - the mregion (as u64)
447+
*
448+
* This routine fields the callback for all QPs and
449+
* for QPs in the same PD as the MR will call the
450+
* rvt_qp_mr_clean() to potentially cleanup references.
451+
*/
452+
static void rvt_dereg_clean_qp_cb(struct rvt_qp *qp, u64 v)
453+
{
454+
struct rvt_mregion *mr = (struct rvt_mregion *)v;
455+
456+
/* skip PDs that are not ours */
457+
if (mr->pd != qp->ibqp.pd)
458+
return;
459+
rvt_qp_mr_clean(qp, mr->lkey);
460+
}
461+
462+
/**
463+
* rvt_dereg_clean_qps - find QPs for reference cleanup
464+
* @mr - the MR that is being deregistered
465+
*
466+
* This routine iterates RC QPs looking for references
467+
* to the lkey noted in mr.
468+
*/
469+
static void rvt_dereg_clean_qps(struct rvt_mregion *mr)
470+
{
471+
struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device);
472+
473+
rvt_qp_iter(rdi, (u64)mr, rvt_dereg_clean_qp_cb);
474+
}
475+
476+
/**
477+
* rvt_check_refs - check references
478+
* @mr - the megion
479+
* @t - the caller identification
480+
*
481+
* This routine checks MRs holding a reference during
482+
* when being de-registered.
483+
*
484+
* If the count is non-zero, the code calls a clean routine then
485+
* waits for the timeout for the count to zero.
486+
*/
487+
static int rvt_check_refs(struct rvt_mregion *mr, const char *t)
488+
{
489+
unsigned long timeout;
490+
struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device);
491+
492+
if (percpu_ref_is_zero(&mr->refcount))
493+
return 0;
494+
/* avoid dma mr */
495+
if (mr->lkey)
496+
rvt_dereg_clean_qps(mr);
497+
timeout = wait_for_completion_timeout(&mr->comp, 5 * HZ);
498+
if (!timeout) {
499+
rvt_pr_err(rdi,
500+
"%s timeout mr %p pd %p lkey %x refcount %ld\n",
501+
t, mr, mr->pd, mr->lkey,
502+
atomic_long_read(&mr->refcount.count));
503+
rvt_get_mr(mr);
504+
return -EBUSY;
505+
}
506+
return 0;
507+
}
508+
509+
/**
510+
* rvt_mr_has_lkey - is MR
511+
* @mr - the mregion
512+
* @lkey - the lkey
513+
*/
514+
bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey)
515+
{
516+
return mr && lkey == mr->lkey;
517+
}
518+
519+
/**
520+
* rvt_ss_has_lkey - is mr in sge tests
521+
* @ss - the sge state
522+
* @lkey
523+
*
524+
* This code tests for an MR in the indicated
525+
* sge state.
526+
*/
527+
bool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey)
528+
{
529+
int i;
530+
bool rval = false;
531+
532+
if (!ss->num_sge)
533+
return rval;
534+
/* first one */
535+
rval = rvt_mr_has_lkey(ss->sge.mr, lkey);
536+
/* any others */
537+
for (i = 0; !rval && i < ss->num_sge - 1; i++)
538+
rval = rvt_mr_has_lkey(ss->sg_list[i].mr, lkey);
539+
return rval;
540+
}
541+
443542
/**
444543
* rvt_dereg_mr - unregister and free a memory region
445544
* @ibmr: the memory region to free
@@ -453,22 +552,14 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
453552
int rvt_dereg_mr(struct ib_mr *ibmr)
454553
{
455554
struct rvt_mr *mr = to_imr(ibmr);
456-
struct rvt_dev_info *rdi = ib_to_rvt(ibmr->pd->device);
457-
int ret = 0;
458-
unsigned long timeout;
555+
int ret;
459556

460557
rvt_free_lkey(&mr->mr);
461558

462559
rvt_put_mr(&mr->mr); /* will set completion if last */
463-
timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ);
464-
if (!timeout) {
465-
rvt_pr_err(rdi,
466-
"rvt_dereg_mr timeout mr %p pd %p\n",
467-
mr, mr->mr.pd);
468-
rvt_get_mr(&mr->mr);
469-
ret = -EBUSY;
560+
ret = rvt_check_refs(&mr->mr, __func__);
561+
if (ret)
470562
goto out;
471-
}
472563
rvt_deinit_mregion(&mr->mr);
473564
if (mr->umem)
474565
ib_umem_release(mr->umem);
@@ -761,16 +852,12 @@ int rvt_dealloc_fmr(struct ib_fmr *ibfmr)
761852
{
762853
struct rvt_fmr *fmr = to_ifmr(ibfmr);
763854
int ret = 0;
764-
unsigned long timeout;
765855

766856
rvt_free_lkey(&fmr->mr);
767857
rvt_put_mr(&fmr->mr); /* will set completion if last */
768-
timeout = wait_for_completion_timeout(&fmr->mr.comp, 5 * HZ);
769-
if (!timeout) {
770-
rvt_get_mr(&fmr->mr);
771-
ret = -EBUSY;
858+
ret = rvt_check_refs(&fmr->mr, __func__);
859+
if (ret)
772860
goto out;
773-
}
774861
rvt_deinit_mregion(&fmr->mr);
775862
kfree(fmr);
776863
out:

drivers/infiniband/sw/rdmavt/qp.c

Lines changed: 108 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -458,10 +458,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
458458
}
459459
}
460460

461-
if (qp->ibqp.qp_type != IB_QPT_RC)
462-
return;
463-
464-
for (n = 0; n < rvt_max_atomic(rdi); n++) {
461+
for (n = 0; qp->s_ack_queue && n < rvt_max_atomic(rdi); n++) {
465462
struct rvt_ack_entry *e = &qp->s_ack_queue[n];
466463

467464
if (e->rdma_sge.mr) {
@@ -471,6 +468,113 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
471468
}
472469
}
473470

471+
/**
472+
* rvt_swqe_has_lkey - return true if lkey is used by swqe
473+
* @wqe - the send wqe
474+
* @lkey - the lkey
475+
*
476+
* Test the swqe for using lkey
477+
*/
478+
static bool rvt_swqe_has_lkey(struct rvt_swqe *wqe, u32 lkey)
479+
{
480+
int i;
481+
482+
for (i = 0; i < wqe->wr.num_sge; i++) {
483+
struct rvt_sge *sge = &wqe->sg_list[i];
484+
485+
if (rvt_mr_has_lkey(sge->mr, lkey))
486+
return true;
487+
}
488+
return false;
489+
}
490+
491+
/**
492+
* rvt_qp_sends_has_lkey - return true is qp sends use lkey
493+
* @qp - the rvt_qp
494+
* @lkey - the lkey
495+
*/
496+
static bool rvt_qp_sends_has_lkey(struct rvt_qp *qp, u32 lkey)
497+
{
498+
u32 s_last = qp->s_last;
499+
500+
while (s_last != qp->s_head) {
501+
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, s_last);
502+
503+
if (rvt_swqe_has_lkey(wqe, lkey))
504+
return true;
505+
506+
if (++s_last >= qp->s_size)
507+
s_last = 0;
508+
}
509+
if (qp->s_rdma_mr)
510+
if (rvt_mr_has_lkey(qp->s_rdma_mr, lkey))
511+
return true;
512+
return false;
513+
}
514+
515+
/**
516+
* rvt_qp_acks_has_lkey - return true if acks have lkey
517+
* @qp - the qp
518+
* @lkey - the lkey
519+
*/
520+
static bool rvt_qp_acks_has_lkey(struct rvt_qp *qp, u32 lkey)
521+
{
522+
int i;
523+
struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
524+
525+
for (i = 0; qp->s_ack_queue && i < rvt_max_atomic(rdi); i++) {
526+
struct rvt_ack_entry *e = &qp->s_ack_queue[i];
527+
528+
if (rvt_mr_has_lkey(e->rdma_sge.mr, lkey))
529+
return true;
530+
}
531+
return false;
532+
}
533+
534+
/*
535+
* rvt_qp_mr_clean - clean up remote ops for lkey
536+
* @qp - the qp
537+
* @lkey - the lkey that is being de-registered
538+
*
539+
* This routine checks if the lkey is being used by
540+
* the qp.
541+
*
542+
* If so, the qp is put into an error state to elminate
543+
* any references from the qp.
544+
*/
545+
void rvt_qp_mr_clean(struct rvt_qp *qp, u32 lkey)
546+
{
547+
bool lastwqe = false;
548+
549+
if (qp->ibqp.qp_type == IB_QPT_SMI ||
550+
qp->ibqp.qp_type == IB_QPT_GSI)
551+
/* avoid special QPs */
552+
return;
553+
spin_lock_irq(&qp->r_lock);
554+
spin_lock(&qp->s_hlock);
555+
spin_lock(&qp->s_lock);
556+
557+
if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
558+
goto check_lwqe;
559+
560+
if (rvt_ss_has_lkey(&qp->r_sge, lkey) ||
561+
rvt_qp_sends_has_lkey(qp, lkey) ||
562+
rvt_qp_acks_has_lkey(qp, lkey))
563+
lastwqe = rvt_error_qp(qp, IB_WC_LOC_PROT_ERR);
564+
check_lwqe:
565+
spin_unlock(&qp->s_lock);
566+
spin_unlock(&qp->s_hlock);
567+
spin_unlock_irq(&qp->r_lock);
568+
if (lastwqe) {
569+
struct ib_event ev;
570+
571+
ev.device = qp->ibqp.device;
572+
ev.element.qp = &qp->ibqp;
573+
ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
574+
qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
575+
}
576+
}
577+
474578
/**
475579
* rvt_remove_qp - remove qp form table
476580
* @rdi: rvt dev struct

include/rdma/rdmavt_mr.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,4 +191,7 @@ static inline void rvt_skip_sge(struct rvt_sge_state *ss, u32 length,
191191
}
192192
}
193193

194+
bool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey);
195+
bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey);
196+
194197
#endif /* DEF_RDMAVT_INCMRH */

include/rdma/rdmavt_qp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -702,4 +702,5 @@ int rvt_qp_iter_next(struct rvt_qp_iter *iter);
702702
void rvt_qp_iter(struct rvt_dev_info *rdi,
703703
u64 v,
704704
void (*cb)(struct rvt_qp *qp, u64 v));
705+
void rvt_qp_mr_clean(struct rvt_qp *qp, u32 lkey);
705706
#endif /* DEF_RDMAVT_INCQP_H */

0 commit comments

Comments
 (0)