@@ -419,68 +419,6 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
419419 return ret ;
420420}
421421
422- static struct mlx5_ib_mr * implicit_mr_get_data (struct mlx5_ib_mr * imr ,
423- u64 io_virt , size_t bcnt )
424- {
425- struct ib_umem_odp * odp_imr = to_ib_umem_odp (imr -> umem );
426- unsigned long end_idx = (io_virt + bcnt - 1 ) >> MLX5_IMR_MTT_SHIFT ;
427- unsigned long idx = io_virt >> MLX5_IMR_MTT_SHIFT ;
428- unsigned long inv_start_idx = end_idx + 1 ;
429- unsigned long inv_len = 0 ;
430- struct mlx5_ib_mr * result = NULL ;
431- int ret ;
432-
433- lockdep_assert_held (& imr -> dev -> odp_srcu );
434-
435- for (idx = idx ; idx <= end_idx ; idx ++ ) {
436- struct mlx5_ib_mr * mtt = xa_load (& imr -> implicit_children , idx );
437-
438- if (unlikely (!mtt )) {
439- mtt = implicit_get_child_mr (imr , idx );
440- if (IS_ERR (mtt )) {
441- result = mtt ;
442- goto out ;
443- }
444- inv_start_idx = min (inv_start_idx , idx );
445- inv_len = idx - inv_start_idx + 1 ;
446- }
447-
448- /* Return first odp if region not covered by single one */
449- if (likely (!result ))
450- result = mtt ;
451- }
452-
453- /*
454- * Any time the implicit_children are changed we must perform an
455- * update of the xlt before exiting to ensure the HW and the
456- * implicit_children remains synchronized.
457- */
458- out :
459- if (likely (!inv_len ))
460- return result ;
461-
462- /*
463- * Notice this is not strictly ordered right, the KSM is updated after
464- * the implicit_leaves is updated, so a parallel page fault could see
465- * a MR that is not yet visible in the KSM. This is similar to a
466- * parallel page fault seeing a MR that is being concurrently removed
467- * from the KSM. Both of these improbable situations are resolved
468- * safely by resuming the HW and then taking another page fault. The
469- * next pagefault handler will see the new information.
470- */
471- mutex_lock (& odp_imr -> umem_mutex );
472- ret = mlx5_ib_update_xlt (imr , inv_start_idx , inv_len , 0 ,
473- MLX5_IB_UPD_XLT_INDIRECT |
474- MLX5_IB_UPD_XLT_ATOMIC );
475- mutex_unlock (& odp_imr -> umem_mutex );
476- if (ret ) {
477- mlx5_ib_err (to_mdev (imr -> ibmr .pd -> device ),
478- "Failed to update PAS\n" );
479- return ERR_PTR (ret );
480- }
481- return result ;
482- }
483-
484422struct mlx5_ib_mr * mlx5_ib_alloc_implicit_mr (struct mlx5_ib_pd * pd ,
485423 struct ib_udata * udata ,
486424 int access_flags )
@@ -647,6 +585,84 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
647585 return ret ;
648586}
649587
588+ static int pagefault_implicit_mr (struct mlx5_ib_mr * imr ,
589+ struct ib_umem_odp * odp_imr , u64 user_va ,
590+ size_t bcnt , u32 * bytes_mapped , u32 flags )
591+ {
592+ unsigned long end_idx = (user_va + bcnt - 1 ) >> MLX5_IMR_MTT_SHIFT ;
593+ unsigned long upd_start_idx = end_idx + 1 ;
594+ unsigned long upd_len = 0 ;
595+ unsigned long npages = 0 ;
596+ int err ;
597+ int ret ;
598+
599+ if (unlikely (user_va >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE ||
600+ mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - user_va < bcnt ))
601+ return - EFAULT ;
602+
603+ /* Fault each child mr that intersects with our interval. */
604+ while (bcnt ) {
605+ unsigned long idx = user_va >> MLX5_IMR_MTT_SHIFT ;
606+ struct ib_umem_odp * umem_odp ;
607+ struct mlx5_ib_mr * mtt ;
608+ u64 len ;
609+
610+ mtt = xa_load (& imr -> implicit_children , idx );
611+ if (unlikely (!mtt )) {
612+ mtt = implicit_get_child_mr (imr , idx );
613+ if (IS_ERR (mtt )) {
614+ ret = PTR_ERR (mtt );
615+ goto out ;
616+ }
617+ upd_start_idx = min (upd_start_idx , idx );
618+ upd_len = idx - upd_start_idx + 1 ;
619+ }
620+
621+ umem_odp = to_ib_umem_odp (mtt -> umem );
622+ len = min_t (u64 , user_va + bcnt , ib_umem_end (umem_odp )) -
623+ user_va ;
624+
625+ ret = pagefault_real_mr (mtt , umem_odp , user_va , len ,
626+ bytes_mapped , flags );
627+ if (ret < 0 )
628+ goto out ;
629+ user_va += len ;
630+ bcnt -= len ;
631+ npages += ret ;
632+ }
633+
634+ ret = npages ;
635+
636+ /*
637+ * Any time the implicit_children are changed we must perform an
638+ * update of the xlt before exiting to ensure the HW and the
639+ * implicit_children remains synchronized.
640+ */
641+ out :
642+ if (likely (!upd_len ))
643+ return ret ;
644+
645+ /*
646+ * Notice this is not strictly ordered right, the KSM is updated after
647+ * the implicit_children is updated, so a parallel page fault could
648+ * see a MR that is not yet visible in the KSM. This is similar to a
649+ * parallel page fault seeing a MR that is being concurrently removed
650+ * from the KSM. Both of these improbable situations are resolved
651+ * safely by resuming the HW and then taking another page fault. The
652+ * next pagefault handler will see the new information.
653+ */
654+ mutex_lock (& odp_imr -> umem_mutex );
655+ err = mlx5_ib_update_xlt (imr , upd_start_idx , upd_len , 0 ,
656+ MLX5_IB_UPD_XLT_INDIRECT |
657+ MLX5_IB_UPD_XLT_ATOMIC );
658+ mutex_unlock (& odp_imr -> umem_mutex );
659+ if (err ) {
660+ mlx5_ib_err (imr -> dev , "Failed to update PAS\n" );
661+ return err ;
662+ }
663+ return ret ;
664+ }
665+
650666/*
651667 * Returns:
652668 * -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are
@@ -660,8 +676,6 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
660676 u32 * bytes_mapped , u32 flags )
661677{
662678 struct ib_umem_odp * odp = to_ib_umem_odp (mr -> umem );
663- struct mlx5_ib_mr * mtt ;
664- int npages = 0 ;
665679
666680 if (!odp -> is_implicit_odp ) {
667681 if (unlikely (io_virt < ib_umem_start (odp ) ||
@@ -670,48 +684,8 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
670684 return pagefault_real_mr (mr , odp , io_virt , bcnt , bytes_mapped ,
671685 flags );
672686 }
673-
674- if (unlikely (io_virt >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE ||
675- mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - io_virt < bcnt ))
676- return - EFAULT ;
677-
678- mtt = implicit_mr_get_data (mr , io_virt , bcnt );
679- if (IS_ERR (mtt ))
680- return PTR_ERR (mtt );
681-
682- /* Fault each child mr that intersects with our interval. */
683- while (bcnt ) {
684- struct ib_umem_odp * umem_odp = to_ib_umem_odp (mtt -> umem );
685- u64 end = min_t (u64 , io_virt + bcnt , ib_umem_end (umem_odp ));
686- u64 len = end - io_virt ;
687- int ret ;
688-
689- ret = pagefault_real_mr (mtt , umem_odp , io_virt , len ,
690- bytes_mapped , flags );
691- if (ret < 0 )
692- return ret ;
693- io_virt += len ;
694- bcnt -= len ;
695- npages += ret ;
696-
697- if (unlikely (bcnt )) {
698- mtt = xa_load (& mr -> implicit_children ,
699- io_virt >> MLX5_IMR_MTT_SHIFT );
700-
701- /*
702- * implicit_mr_get_data sets up all the leaves, this
703- * means they got invalidated before we got to them.
704- */
705- if (!mtt ) {
706- mlx5_ib_dbg (
707- mr -> dev ,
708- "next implicit leaf removed at 0x%llx.\n" ,
709- io_virt );
710- return - EAGAIN ;
711- }
712- }
713- }
714- return npages ;
687+ return pagefault_implicit_mr (mr , odp , io_virt , bcnt , bytes_mapped ,
688+ flags );
715689}
716690
717691struct pf_frame {
0 commit comments