Skip to content

Commit d03fb5c

Browse files
ddmatsujgunthorpe
authored andcommitted
RDMA/rxe: Allow registering MRs for On-Demand Paging
Allow userspace to register an ODP-enabled MR, in which case the flag IB_ACCESS_ON_DEMAND is passed to rxe_reg_user_mr(). However, there is no RDMA operation enabled right now. They will be supported later in the subsequent two patches. rxe_odp_do_pagefault() is called to initialize an ODP-enabled MR. It syncs process address space from the CPU page table to the driver page table (dma_list/pfn_list in umem_odp) when called with RXE_PAGEFAULT_SNAPSHOT flag. Additionally, It can be used to trigger page fault when pages being accessed are not present or do not have proper read/write permissions, and possibly to prefetch pages in the future. Link: https://patch.msgid.link/r/[email protected] Signed-off-by: Daisuke Matsuda <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent b601792 commit d03fb5c

File tree

6 files changed

+128
-6
lines changed

6 files changed

+128
-6
lines changed

drivers/infiniband/sw/rxe/rxe.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,13 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
9292
dev_put(ndev);
9393

9494
rxe->max_ucontext = RXE_MAX_UCONTEXT;
95+
96+
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
97+
rxe->attr.kernel_cap_flags |= IBK_ON_DEMAND_PAGING;
98+
99+
/* IB_ODP_SUPPORT_IMPLICIT is not supported right now. */
100+
rxe->attr.odp_caps.general_caps |= IB_ODP_SUPPORT;
101+
}
95102
}
96103

97104
/* initialize port attributes */

drivers/infiniband/sw/rxe/rxe_loc.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,4 +184,16 @@ static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp)
184184
/* rxe_odp.c */
185185
extern const struct mmu_interval_notifier_ops rxe_mn_ops;
186186

187+
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
188+
int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
189+
u64 iova, int access_flags, struct rxe_mr *mr);
190+
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
191+
static inline int
192+
rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
193+
int access_flags, struct rxe_mr *mr)
194+
{
195+
return -EOPNOTSUPP;
196+
}
197+
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
198+
187199
#endif /* RXE_LOC_H */

drivers/infiniband/sw/rxe/rxe_mr.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,10 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
323323
return err;
324324
}
325325

326-
return rxe_mr_copy_xarray(mr, iova, addr, length, dir);
326+
if (mr->umem->is_odp)
327+
return -EOPNOTSUPP;
328+
else
329+
return rxe_mr_copy_xarray(mr, iova, addr, length, dir);
327330
}
328331

329332
/* copy data in or out of a wqe, i.e. sg list
@@ -532,6 +535,10 @@ int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
532535
struct page *page;
533536
u64 *va;
534537

538+
/* ODP is not supported right now. WIP. */
539+
if (mr->umem->is_odp)
540+
return RESPST_ERR_UNSUPPORTED_OPCODE;
541+
535542
/* See IBA oA19-28 */
536543
if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
537544
rxe_dbg_mr(mr, "mr not in valid state\n");

drivers/infiniband/sw/rxe/rxe_odp.c

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,89 @@ static bool rxe_ib_invalidate_range(struct mmu_interval_notifier *mni,
3636
const struct mmu_interval_notifier_ops rxe_mn_ops = {
3737
.invalidate = rxe_ib_invalidate_range,
3838
};
39+
40+
#define RXE_PAGEFAULT_RDONLY BIT(1)
41+
#define RXE_PAGEFAULT_SNAPSHOT BIT(2)
42+
static int rxe_odp_do_pagefault_and_lock(struct rxe_mr *mr, u64 user_va, int bcnt, u32 flags)
43+
{
44+
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
45+
bool fault = !(flags & RXE_PAGEFAULT_SNAPSHOT);
46+
u64 access_mask;
47+
int np;
48+
49+
access_mask = ODP_READ_ALLOWED_BIT;
50+
if (umem_odp->umem.writable && !(flags & RXE_PAGEFAULT_RDONLY))
51+
access_mask |= ODP_WRITE_ALLOWED_BIT;
52+
53+
/*
54+
* ib_umem_odp_map_dma_and_lock() locks umem_mutex on success.
55+
* Callers must release the lock later to let invalidation handler
56+
* do its work again.
57+
*/
58+
np = ib_umem_odp_map_dma_and_lock(umem_odp, user_va, bcnt,
59+
access_mask, fault);
60+
return np;
61+
}
62+
63+
static int rxe_odp_init_pages(struct rxe_mr *mr)
64+
{
65+
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
66+
int ret;
67+
68+
ret = rxe_odp_do_pagefault_and_lock(mr, mr->umem->address,
69+
mr->umem->length,
70+
RXE_PAGEFAULT_SNAPSHOT);
71+
72+
if (ret >= 0)
73+
mutex_unlock(&umem_odp->umem_mutex);
74+
75+
return ret >= 0 ? 0 : ret;
76+
}
77+
78+
int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
79+
u64 iova, int access_flags, struct rxe_mr *mr)
80+
{
81+
struct ib_umem_odp *umem_odp;
82+
int err;
83+
84+
if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
85+
return -EOPNOTSUPP;
86+
87+
rxe_mr_init(access_flags, mr);
88+
89+
if (!start && length == U64_MAX) {
90+
if (iova != 0)
91+
return -EINVAL;
92+
if (!(rxe->attr.odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
93+
return -EINVAL;
94+
95+
/* Never reach here, for implicit ODP is not implemented. */
96+
}
97+
98+
umem_odp = ib_umem_odp_get(&rxe->ib_dev, start, length, access_flags,
99+
&rxe_mn_ops);
100+
if (IS_ERR(umem_odp)) {
101+
rxe_dbg_mr(mr, "Unable to create umem_odp err = %d\n",
102+
(int)PTR_ERR(umem_odp));
103+
return PTR_ERR(umem_odp);
104+
}
105+
106+
umem_odp->private = mr;
107+
108+
mr->umem = &umem_odp->umem;
109+
mr->access = access_flags;
110+
mr->ibmr.length = length;
111+
mr->ibmr.iova = iova;
112+
mr->page_offset = ib_umem_offset(&umem_odp->umem);
113+
114+
err = rxe_odp_init_pages(mr);
115+
if (err) {
116+
ib_umem_odp_release(umem_odp);
117+
return err;
118+
}
119+
120+
mr->state = RXE_MR_STATE_VALID;
121+
mr->ibmr.type = IB_MR_TYPE_USER;
122+
123+
return err;
124+
}

drivers/infiniband/sw/rxe/rxe_resp.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,10 @@ static enum resp_states process_flush(struct rxe_qp *qp,
649649
struct rxe_mr *mr = qp->resp.mr;
650650
struct resp_res *res = qp->resp.res;
651651

652+
/* ODP is not supported right now. WIP. */
653+
if (mr->umem->is_odp)
654+
return RESPST_ERR_UNSUPPORTED_OPCODE;
655+
652656
/* oA19-14, oA19-15 */
653657
if (res && res->replay)
654658
return RESPST_ACKNOWLEDGE;
@@ -702,10 +706,13 @@ static enum resp_states atomic_reply(struct rxe_qp *qp,
702706
if (!res->replay) {
703707
u64 iova = qp->resp.va + qp->resp.offset;
704708

705-
err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode,
706-
atmeth_comp(pkt),
707-
atmeth_swap_add(pkt),
708-
&res->atomic.orig_val);
709+
if (mr->umem->is_odp)
710+
err = RESPST_ERR_UNSUPPORTED_OPCODE;
711+
else
712+
err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode,
713+
atmeth_comp(pkt),
714+
atmeth_swap_add(pkt),
715+
&res->atomic.orig_val);
709716
if (err)
710717
return err;
711718

drivers/infiniband/sw/rxe/rxe_verbs.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1298,7 +1298,10 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, u64 start,
12981298
mr->ibmr.pd = ibpd;
12991299
mr->ibmr.device = ibpd->device;
13001300

1301-
err = rxe_mr_init_user(rxe, start, length, access, mr);
1301+
if (access & IB_ACCESS_ON_DEMAND)
1302+
err = rxe_odp_mr_init_user(rxe, start, length, iova, access, mr);
1303+
else
1304+
err = rxe_mr_init_user(rxe, start, length, access, mr);
13021305
if (err) {
13031306
rxe_dbg_mr(mr, "reg_user_mr failed, err = %d\n", err);
13041307
goto err_cleanup;

0 commit comments

Comments
 (0)