Skip to content

Commit 647bf13

Browse files
Bob Pearsonjgunthorpe
authored andcommitted
RDMA/rxe: Create duplicate mapping tables for FMRs
For fast memory regions create duplicate mapping tables so ib_map_mr_sg() can build a new mapping table which is then swapped into place synchronously with the execution of an IB_WR_REG_MR work request. Currently the rxe driver uses the same table for receiving RDMA operations and for building new tables in preparation for reusing the MR. This exposes users to potentially incorrect results. Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Bob Pearson <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent 0013453 commit 647bf13

File tree

5 files changed

+161
-102
lines changed

5 files changed

+161
-102
lines changed

drivers/infiniband/sw/rxe/rxe_loc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length);
8787
int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
8888
int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey);
8989
int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
90+
int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr);
9091
int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
9192
void rxe_mr_cleanup(struct rxe_pool_entry *arg);
9293

drivers/infiniband/sw/rxe/rxe_mr.c

Lines changed: 132 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,16 @@ u8 rxe_get_next_key(u32 last_key)
2424

2525
int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
2626
{
27-
27+
struct rxe_map_set *set = mr->cur_map_set;
2828

2929
switch (mr->type) {
3030
case IB_MR_TYPE_DMA:
3131
return 0;
3232

3333
case IB_MR_TYPE_USER:
3434
case IB_MR_TYPE_MEM_REG:
35-
if (iova < mr->iova || length > mr->length ||
36-
iova > mr->iova + mr->length - length)
35+
if (iova < set->iova || length > set->length ||
36+
iova > set->iova + set->length - length)
3737
return -EFAULT;
3838
return 0;
3939

@@ -65,41 +65,89 @@ static void rxe_mr_init(int access, struct rxe_mr *mr)
6565
mr->map_shift = ilog2(RXE_BUF_PER_MAP);
6666
}
6767

68-
static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
68+
static void rxe_mr_free_map_set(int num_map, struct rxe_map_set *set)
6969
{
7070
int i;
71-
int num_map;
72-
struct rxe_map **map = mr->map;
7371

74-
num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
72+
for (i = 0; i < num_map; i++)
73+
kfree(set->map[i]);
7574

76-
mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
77-
if (!mr->map)
78-
goto err1;
75+
kfree(set->map);
76+
kfree(set);
77+
}
78+
79+
static int rxe_mr_alloc_map_set(int num_map, struct rxe_map_set **setp)
80+
{
81+
int i;
82+
struct rxe_map_set *set;
83+
84+
set = kmalloc(sizeof(*set), GFP_KERNEL);
85+
if (!set)
86+
goto err_out;
87+
88+
set->map = kmalloc_array(num_map, sizeof(struct rxe_map *), GFP_KERNEL);
89+
if (!set->map)
90+
goto err_free_set;
7991

8092
for (i = 0; i < num_map; i++) {
81-
mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
82-
if (!mr->map[i])
83-
goto err2;
93+
set->map[i] = kmalloc(sizeof(struct rxe_map), GFP_KERNEL);
94+
if (!set->map[i])
95+
goto err_free_map;
8496
}
8597

98+
*setp = set;
99+
100+
return 0;
101+
102+
err_free_map:
103+
for (i--; i >= 0; i--)
104+
kfree(set->map[i]);
105+
106+
kfree(set->map);
107+
err_free_set:
108+
kfree(set);
109+
err_out:
110+
return -ENOMEM;
111+
}
112+
113+
/**
114+
* rxe_mr_alloc() - Allocate memory map array(s) for MR
115+
* @mr: Memory region
116+
* @num_buf: Number of buffer descriptors to support
117+
* @both: If non zero allocate both mr->map and mr->next_map
118+
* else just allocate mr->map. Used for fast MRs
119+
*
120+
* Return: 0 on success else an error
121+
*/
122+
static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf, int both)
123+
{
124+
int ret;
125+
int num_map;
126+
86127
BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
128+
num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
87129

88130
mr->map_shift = ilog2(RXE_BUF_PER_MAP);
89131
mr->map_mask = RXE_BUF_PER_MAP - 1;
90-
91132
mr->num_buf = num_buf;
92-
mr->num_map = num_map;
93133
mr->max_buf = num_map * RXE_BUF_PER_MAP;
134+
mr->num_map = num_map;
94135

95-
return 0;
136+
ret = rxe_mr_alloc_map_set(num_map, &mr->cur_map_set);
137+
if (ret)
138+
goto err_out;
96139

97-
err2:
98-
for (i--; i >= 0; i--)
99-
kfree(mr->map[i]);
140+
if (both) {
141+
ret = rxe_mr_alloc_map_set(num_map, &mr->next_map_set);
142+
if (ret) {
143+
rxe_mr_free_map_set(mr->num_map, mr->cur_map_set);
144+
goto err_out;
145+
}
146+
}
100147

101-
kfree(mr->map);
102-
err1:
148+
return 0;
149+
150+
err_out:
103151
return -ENOMEM;
104152
}
105153

@@ -116,14 +164,14 @@ void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
116164
int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
117165
int access, struct rxe_mr *mr)
118166
{
167+
struct rxe_map_set *set;
119168
struct rxe_map **map;
120169
struct rxe_phys_buf *buf = NULL;
121170
struct ib_umem *umem;
122171
struct sg_page_iter sg_iter;
123172
int num_buf;
124173
void *vaddr;
125174
int err;
126-
int i;
127175

128176
umem = ib_umem_get(pd->ibpd.device, start, length, access);
129177
if (IS_ERR(umem)) {
@@ -137,18 +185,20 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
137185

138186
rxe_mr_init(access, mr);
139187

140-
err = rxe_mr_alloc(mr, num_buf);
188+
err = rxe_mr_alloc(mr, num_buf, 0);
141189
if (err) {
142190
pr_warn("%s: Unable to allocate memory for map\n",
143191
__func__);
144192
goto err_release_umem;
145193
}
146194

147-
mr->page_shift = PAGE_SHIFT;
148-
mr->page_mask = PAGE_SIZE - 1;
195+
set = mr->cur_map_set;
196+
set->page_shift = PAGE_SHIFT;
197+
set->page_mask = PAGE_SIZE - 1;
198+
199+
num_buf = 0;
200+
map = set->map;
149201

150-
num_buf = 0;
151-
map = mr->map;
152202
if (length > 0) {
153203
buf = map[0]->buf;
154204

@@ -171,26 +221,24 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
171221
buf->size = PAGE_SIZE;
172222
num_buf++;
173223
buf++;
174-
175224
}
176225
}
177226

178227
mr->ibmr.pd = &pd->ibpd;
179228
mr->umem = umem;
180229
mr->access = access;
181-
mr->length = length;
182-
mr->iova = iova;
183-
mr->va = start;
184-
mr->offset = ib_umem_offset(umem);
185230
mr->state = RXE_MR_STATE_VALID;
186231
mr->type = IB_MR_TYPE_USER;
187232

233+
set->length = length;
234+
set->iova = iova;
235+
set->va = start;
236+
set->offset = ib_umem_offset(umem);
237+
188238
return 0;
189239

190240
err_cleanup_map:
191-
for (i = 0; i < mr->num_map; i++)
192-
kfree(mr->map[i]);
193-
kfree(mr->map);
241+
rxe_mr_free_map_set(mr->num_map, mr->cur_map_set);
194242
err_release_umem:
195243
ib_umem_release(umem);
196244
err_out:
@@ -204,7 +252,7 @@ int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
204252
/* always allow remote access for FMRs */
205253
rxe_mr_init(IB_ACCESS_REMOTE, mr);
206254

207-
err = rxe_mr_alloc(mr, max_pages);
255+
err = rxe_mr_alloc(mr, max_pages, 1);
208256
if (err)
209257
goto err1;
210258

@@ -222,21 +270,24 @@ int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
222270
static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
223271
size_t *offset_out)
224272
{
225-
size_t offset = iova - mr->iova + mr->offset;
273+
struct rxe_map_set *set = mr->cur_map_set;
274+
size_t offset = iova - set->iova + set->offset;
226275
int map_index;
227276
int buf_index;
228277
u64 length;
278+
struct rxe_map *map;
229279

230-
if (likely(mr->page_shift)) {
231-
*offset_out = offset & mr->page_mask;
232-
offset >>= mr->page_shift;
280+
if (likely(set->page_shift)) {
281+
*offset_out = offset & set->page_mask;
282+
offset >>= set->page_shift;
233283
*n_out = offset & mr->map_mask;
234284
*m_out = offset >> mr->map_shift;
235285
} else {
236286
map_index = 0;
237287
buf_index = 0;
238288

239-
length = mr->map[map_index]->buf[buf_index].size;
289+
map = set->map[map_index];
290+
length = map->buf[buf_index].size;
240291

241292
while (offset >= length) {
242293
offset -= length;
@@ -246,7 +297,8 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
246297
map_index++;
247298
buf_index = 0;
248299
}
249-
length = mr->map[map_index]->buf[buf_index].size;
300+
map = set->map[map_index];
301+
length = map->buf[buf_index].size;
250302
}
251303

252304
*m_out = map_index;
@@ -267,7 +319,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
267319
goto out;
268320
}
269321

270-
if (!mr->map) {
322+
if (!mr->cur_map_set) {
271323
addr = (void *)(uintptr_t)iova;
272324
goto out;
273325
}
@@ -280,13 +332,13 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
280332

281333
lookup_iova(mr, iova, &m, &n, &offset);
282334

283-
if (offset + length > mr->map[m]->buf[n].size) {
335+
if (offset + length > mr->cur_map_set->map[m]->buf[n].size) {
284336
pr_warn("crosses page boundary\n");
285337
addr = NULL;
286338
goto out;
287339
}
288340

289-
addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
341+
addr = (void *)(uintptr_t)mr->cur_map_set->map[m]->buf[n].addr + offset;
290342

291343
out:
292344
return addr;
@@ -322,7 +374,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
322374
return 0;
323375
}
324376

325-
WARN_ON_ONCE(!mr->map);
377+
WARN_ON_ONCE(!mr->cur_map_set);
326378

327379
err = mr_check_range(mr, iova, length);
328380
if (err) {
@@ -332,7 +384,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
332384

333385
lookup_iova(mr, iova, &m, &i, &offset);
334386

335-
map = mr->map + m;
387+
map = mr->cur_map_set->map + m;
336388
buf = map[0]->buf + i;
337389

338390
while (length > 0) {
@@ -572,8 +624,9 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey)
572624
int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
573625
{
574626
struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr);
575-
u32 key = wqe->wr.wr.reg.key;
627+
u32 key = wqe->wr.wr.reg.key & 0xff;
576628
u32 access = wqe->wr.wr.reg.access;
629+
struct rxe_map_set *set;
577630

578631
/* user can only register MR in free state */
579632
if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
@@ -589,19 +642,36 @@ int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
589642
return -EINVAL;
590643
}
591644

592-
/* user is only allowed to change key portion of l/rkey */
593-
if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) {
594-
pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n",
595-
__func__, key, mr->lkey);
596-
return -EINVAL;
597-
}
598-
599645
mr->access = access;
600-
mr->lkey = key;
601-
mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0;
602-
mr->iova = wqe->wr.wr.reg.mr->iova;
646+
mr->lkey = (mr->lkey & ~0xff) | key;
647+
mr->rkey = (access & IB_ACCESS_REMOTE) ? mr->lkey : 0;
603648
mr->state = RXE_MR_STATE_VALID;
604649

650+
set = mr->cur_map_set;
651+
mr->cur_map_set = mr->next_map_set;
652+
mr->cur_map_set->iova = wqe->wr.wr.reg.mr->iova;
653+
mr->next_map_set = set;
654+
655+
return 0;
656+
}
657+
658+
int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr)
659+
{
660+
struct rxe_mr *mr = to_rmr(ibmr);
661+
struct rxe_map_set *set = mr->next_map_set;
662+
struct rxe_map *map;
663+
struct rxe_phys_buf *buf;
664+
665+
if (unlikely(set->nbuf == mr->num_buf))
666+
return -ENOMEM;
667+
668+
map = set->map[set->nbuf / RXE_BUF_PER_MAP];
669+
buf = &map->buf[set->nbuf % RXE_BUF_PER_MAP];
670+
671+
buf->addr = addr;
672+
buf->size = ibmr->page_size;
673+
set->nbuf++;
674+
605675
return 0;
606676
}
607677

@@ -626,14 +696,12 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
626696
void rxe_mr_cleanup(struct rxe_pool_entry *arg)
627697
{
628698
struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
629-
int i;
630699

631700
ib_umem_release(mr->umem);
632701

633-
if (mr->map) {
634-
for (i = 0; i < mr->num_map; i++)
635-
kfree(mr->map[i]);
702+
if (mr->cur_map_set)
703+
rxe_mr_free_map_set(mr->num_map, mr->cur_map_set);
636704

637-
kfree(mr->map);
638-
}
705+
if (mr->next_map_set)
706+
rxe_mr_free_map_set(mr->num_map, mr->next_map_set);
639707
}

drivers/infiniband/sw/rxe/rxe_mw.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,15 +142,15 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
142142

143143
/* C10-75 */
144144
if (mw->access & IB_ZERO_BASED) {
145-
if (unlikely(wqe->wr.wr.mw.length > mr->length)) {
145+
if (unlikely(wqe->wr.wr.mw.length > mr->cur_map_set->length)) {
146146
pr_err_once(
147147
"attempt to bind a ZB MW outside of the MR\n");
148148
return -EINVAL;
149149
}
150150
} else {
151-
if (unlikely((wqe->wr.wr.mw.addr < mr->iova) ||
151+
if (unlikely((wqe->wr.wr.mw.addr < mr->cur_map_set->iova) ||
152152
((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) >
153-
(mr->iova + mr->length)))) {
153+
(mr->cur_map_set->iova + mr->cur_map_set->length)))) {
154154
pr_err_once(
155155
"attempt to bind a VA MW outside of the MR\n");
156156
return -EINVAL;

0 commit comments

Comments
 (0)