Skip to content

Commit fd7dbf0

Browse files
committed
RDMA/odp: Make it clearer when a umem is an implicit ODP umem
Implicit ODP umems are special, they don't have any page lists, they don't exist in the interval tree and they are never DMA mapped. Instead of trying to guess this based on a zero length use an explicit flag. Further, do not allow non-implicit umems to be 0 size. Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Leon Romanovsky <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent f993de8 commit fd7dbf0

File tree

4 files changed

+40
-26
lines changed

4 files changed

+40
-26
lines changed

drivers/infiniband/core/umem_odp.c

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -176,18 +176,15 @@ static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
176176
struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
177177

178178
down_write(&per_mm->umem_rwsem);
179-
if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp))) {
180-
/*
181-
* Note that the representation of the intervals in the
182-
* interval tree considers the ending point as contained in
183-
* the interval, while the function ib_umem_end returns the
184-
* first address which is not contained in the umem.
185-
*/
186-
umem_odp->interval_tree.start = ib_umem_start(umem_odp);
187-
umem_odp->interval_tree.last = ib_umem_end(umem_odp) - 1;
188-
interval_tree_insert(&umem_odp->interval_tree,
189-
&per_mm->umem_tree);
190-
}
179+
/*
180+
* Note that the representation of the intervals in the interval tree
181+
* considers the ending point as contained in the interval, while the
182+
* function ib_umem_end returns the first address which is not
183+
* contained in the umem.
184+
*/
185+
umem_odp->interval_tree.start = ib_umem_start(umem_odp);
186+
umem_odp->interval_tree.last = ib_umem_end(umem_odp) - 1;
187+
interval_tree_insert(&umem_odp->interval_tree, &per_mm->umem_tree);
191188
up_write(&per_mm->umem_rwsem);
192189
}
193190

@@ -196,11 +193,8 @@ static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp)
196193
struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
197194

198195
down_write(&per_mm->umem_rwsem);
199-
if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp)))
200-
interval_tree_remove(&umem_odp->interval_tree,
201-
&per_mm->umem_tree);
196+
interval_tree_remove(&umem_odp->interval_tree, &per_mm->umem_tree);
202197
complete_all(&umem_odp->notifier_completion);
203-
204198
up_write(&per_mm->umem_rwsem);
205199
}
206200

@@ -320,6 +314,9 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root,
320314
int pages = size >> PAGE_SHIFT;
321315
int ret;
322316

317+
if (!size)
318+
return ERR_PTR(-EINVAL);
319+
323320
odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
324321
if (!odp_data)
325322
return ERR_PTR(-ENOMEM);
@@ -381,6 +378,9 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
381378
struct mm_struct *mm = umem->owning_mm;
382379
int ret_val;
383380

381+
if (umem_odp->umem.address == 0 && umem_odp->umem.length == 0)
382+
umem_odp->is_implicit_odp = 1;
383+
384384
umem_odp->page_shift = PAGE_SHIFT;
385385
if (access & IB_ACCESS_HUGETLB) {
386386
struct vm_area_struct *vma;
@@ -401,7 +401,10 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
401401

402402
init_completion(&umem_odp->notifier_completion);
403403

404-
if (ib_umem_odp_num_pages(umem_odp)) {
404+
if (!umem_odp->is_implicit_odp) {
405+
if (!ib_umem_odp_num_pages(umem_odp))
406+
return -EINVAL;
407+
405408
umem_odp->page_list =
406409
vzalloc(array_size(sizeof(*umem_odp->page_list),
407410
ib_umem_odp_num_pages(umem_odp)));
@@ -420,7 +423,9 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
420423
ret_val = get_per_mm(umem_odp);
421424
if (ret_val)
422425
goto out_dma_list;
423-
add_umem_to_per_mm(umem_odp);
426+
427+
if (!umem_odp->is_implicit_odp)
428+
add_umem_to_per_mm(umem_odp);
424429

425430
return 0;
426431

@@ -439,13 +444,14 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
439444
* It is the driver's responsibility to ensure, before calling us,
440445
* that the hardware will not attempt to access the MR any more.
441446
*/
442-
ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
443-
ib_umem_end(umem_odp));
444-
445-
remove_umem_from_per_mm(umem_odp);
447+
if (!umem_odp->is_implicit_odp) {
448+
ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
449+
ib_umem_end(umem_odp));
450+
remove_umem_from_per_mm(umem_odp);
451+
vfree(umem_odp->dma_list);
452+
vfree(umem_odp->page_list);
453+
}
446454
put_per_mm(umem_odp);
447-
vfree(umem_odp->dma_list);
448-
vfree(umem_odp->page_list);
449455
}
450456

451457
/*

drivers/infiniband/hw/mlx5/mr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1600,7 +1600,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
16001600
/* Wait for all running page-fault handlers to finish. */
16011601
synchronize_srcu(&dev->mr_srcu);
16021602
/* Destroy all page mappings */
1603-
if (umem_odp->page_list)
1603+
if (!umem_odp->is_implicit_odp)
16041604
mlx5_ib_invalidate_range(umem_odp,
16051605
ib_umem_start(umem_odp),
16061606
ib_umem_end(umem_odp));

drivers/infiniband/hw/mlx5/odp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -584,7 +584,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
584584
struct ib_umem_odp *odp;
585585
size_t size;
586586

587-
if (!odp_mr->page_list) {
587+
if (odp_mr->is_implicit_odp) {
588588
odp = implicit_mr_get_data(mr, io_virt, bcnt);
589589

590590
if (IS_ERR(odp))

include/rdma/ib_umem_odp.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,14 @@ struct ib_umem_odp {
6969
/* Tree tracking */
7070
struct interval_tree_node interval_tree;
7171

72+
/*
73+
* An implicit odp umem cannot be DMA mapped, has 0 length, and serves
74+
* only as an anchor for the driver to hold onto the per_mm. FIXME:
75+
* This should be removed and drivers should work with the per_mm
76+
* directly.
77+
*/
78+
bool is_implicit_odp;
79+
7280
struct completion notifier_completion;
7381
int dying;
7482
unsigned int page_shift;

0 commit comments

Comments
 (0)