Skip to content

Commit e73242a

Browse files
EdwardSrorleon
authored andcommitted
RDMA/mlx5: Optimize DMABUF mkey page size
The current implementation of DMABUF memory registration uses a fixed page size for the memory key (mkey), which can lead to suboptimal performance when the underlying memory layout may offer better page size. The optimization improves performance by reducing the number of page table entries required for the mkey, leading to less MTT/KSM descriptors that the HCA must go through to find translations, fewer cache-lines, and shorter UMR work requests on mkey updates such as when re-registering or reusing a cacheable mkey. To ensure safe page size updates, the implementation uses a 5-step process: 1. Make the first X entries non-present, while X is calculated to be minimal according to a large page shift that can be used to cover the MR length. 2. Update the page size to the large supported page size 3. Load the remaining N-X entries according to the (optimized) page shift 4. Update the page size according to the (optimized) page shift 5. Load the first X entries with the correct translations This ensures that at no point is the MR accessible with a partially updated translation table, maintaining correctness and preventing access to stale or inconsistent mappings, such as having an mkey advertising the new page size while some of the underlying page table entries still contain the old page size translations. Signed-off-by: Edward Srouji <[email protected]> Reviewed-by: Michael Guralnik <[email protected]> Link: https://patch.msgid.link/bc05a6b2142c02f96a90635f9a4458ee4bbbf39f.1751979184.git.leon@kernel.org Signed-off-by: Leon Romanovsky <[email protected]>
1 parent fcfb035 commit e73242a

File tree

4 files changed

+327
-43
lines changed

4 files changed

+327
-43
lines changed

drivers/infiniband/hw/mlx5/mlx5_ib.h

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -104,19 +104,6 @@ unsigned long __mlx5_umem_find_best_quantized_pgoff(
104104
__mlx5_bit_sz(typ, page_offset_fld), 0, scale, \
105105
page_offset_quantized)
106106

107-
static inline unsigned long
108-
mlx5_umem_dmabuf_find_best_pgsz(struct ib_umem_dmabuf *umem_dmabuf)
109-
{
110-
/*
111-
* mkeys used for dmabuf are fixed at PAGE_SIZE because we must be able
112-
* to hold any sgl after a move operation. Ideally the mkc page size
113-
* could be changed at runtime to be optimal, but right now the driver
114-
* cannot do that.
115-
*/
116-
return ib_umem_find_best_pgsz(&umem_dmabuf->umem, PAGE_SIZE,
117-
umem_dmabuf->umem.iova);
118-
}
119-
120107
enum {
121108
MLX5_IB_MMAP_OFFSET_START = 9,
122109
MLX5_IB_MMAP_OFFSET_END = 255,
@@ -352,6 +339,7 @@ struct mlx5_ib_flow_db {
352339
#define MLX5_IB_UPD_XLT_ACCESS BIT(5)
353340
#define MLX5_IB_UPD_XLT_INDIRECT BIT(6)
354341
#define MLX5_IB_UPD_XLT_DOWNGRADE BIT(7)
342+
#define MLX5_IB_UPD_XLT_KEEP_PGSZ BIT(8)
355343

356344
/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
357345
*
@@ -739,6 +727,8 @@ struct mlx5_ib_mr {
739727
struct mlx5_ib_mr *dd_crossed_mr;
740728
struct list_head dd_node;
741729
u8 revoked :1;
730+
/* Indicates previous dmabuf page fault occurred */
731+
u8 dmabuf_faulted:1;
742732
struct mlx5_ib_mkey null_mmkey;
743733
};
744734
};
@@ -1807,4 +1797,14 @@ mlx5_umem_mkc_find_best_pgsz(struct mlx5_ib_dev *dev, struct ib_umem *umem,
18071797
return ib_umem_find_best_pgsz(umem, bitmap, iova);
18081798
}
18091799

1800+
static inline unsigned long
1801+
mlx5_umem_dmabuf_find_best_pgsz(struct ib_umem_dmabuf *umem_dmabuf,
1802+
int access_mode)
1803+
{
1804+
return mlx5_umem_mkc_find_best_pgsz(to_mdev(umem_dmabuf->umem.ibdev),
1805+
&umem_dmabuf->umem,
1806+
umem_dmabuf->umem.iova,
1807+
access_mode);
1808+
}
1809+
18101810
#endif /* MLX5_IB_H */

drivers/infiniband/hw/mlx5/odp.c

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -836,9 +836,13 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
836836
u32 *bytes_mapped, u32 flags)
837837
{
838838
struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem);
839+
int access_mode = mr->data_direct ? MLX5_MKC_ACCESS_MODE_KSM :
840+
MLX5_MKC_ACCESS_MODE_MTT;
841+
unsigned int old_page_shift = mr->page_shift;
842+
unsigned int page_shift;
843+
unsigned long page_size;
839844
u32 xlt_flags = 0;
840845
int err;
841-
unsigned long page_size;
842846

843847
if (flags & MLX5_PF_FLAGS_ENABLE)
844848
xlt_flags |= MLX5_IB_UPD_XLT_ENABLE;
@@ -850,20 +854,33 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
850854
return err;
851855
}
852856

853-
page_size = mlx5_umem_dmabuf_find_best_pgsz(umem_dmabuf);
857+
page_size = mlx5_umem_dmabuf_find_best_pgsz(umem_dmabuf, access_mode);
854858
if (!page_size) {
855859
ib_umem_dmabuf_unmap_pages(umem_dmabuf);
856860
err = -EINVAL;
857861
} else {
858-
if (mr->data_direct)
859-
err = mlx5r_umr_update_data_direct_ksm_pas(mr, xlt_flags);
860-
else
861-
err = mlx5r_umr_update_mr_pas(mr, xlt_flags);
862+
page_shift = order_base_2(page_size);
863+
if (page_shift != mr->page_shift && mr->dmabuf_faulted) {
864+
err = mlx5r_umr_dmabuf_update_pgsz(mr, xlt_flags,
865+
page_shift);
866+
} else {
867+
mr->page_shift = page_shift;
868+
if (mr->data_direct)
869+
err = mlx5r_umr_update_data_direct_ksm_pas(
870+
mr, xlt_flags);
871+
else
872+
err = mlx5r_umr_update_mr_pas(mr,
873+
xlt_flags);
874+
}
862875
}
863876
dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
864877

865-
if (err)
878+
if (err) {
879+
mr->page_shift = old_page_shift;
866880
return err;
881+
}
882+
883+
mr->dmabuf_faulted = 1;
867884

868885
if (bytes_mapped)
869886
*bytes_mapped += bcnt;

0 commit comments

Comments
 (0)