@@ -803,6 +803,7 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_b
803803{
804804 struct mca_btl_base_module_t * * possible_btls = NULL ;
805805 int comm_size = ompi_comm_size (comm );
806+ int comm_rank = ompi_comm_rank (comm );
806807 int rc = OMPI_SUCCESS , max_btls = 0 ;
807808 unsigned int selected_latency = INT_MAX ;
808809 struct mca_btl_base_module_t * selected_btl = NULL ;
@@ -842,10 +843,11 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_b
842843 return OMPI_SUCCESS ;
843844 }
844845
845- for (int i = 0 ; i < comm_size ; ++ i ) {
846- ompi_proc_t * proc = ompi_comm_peer_lookup (comm , i );
846+ for (int rank = 0 ; rank < comm_size ; ++ rank ) {
847+ ompi_proc_t * proc = ompi_comm_peer_lookup (comm , rank );
847848 mca_bml_base_endpoint_t * endpoint ;
848849 int num_btls , prev_max ;
850+ bool found_btl = false;
849851
850852 endpoint = mca_bml_base_get_endpoint (proc );
851853 if (NULL == endpoint ) {
@@ -891,23 +893,30 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_b
891893 for (int j = 0 ; j < max_btls ; ++ j ) {
892894 if (endpoint -> btl_rdma .bml_btls [i_btl ].btl == possible_btls [j ]) {
893895 ++ btl_counts [j ];
896+ found_btl = true;
894897 break ;
895898 } else if (NULL == possible_btls [j ]) {
896899 possible_btls [j ] = endpoint -> btl_rdma .bml_btls [i_btl ].btl ;
897900 btl_counts [j ] = 1 ;
901+ found_btl = true;
898902 break ;
899903 }
900904 }
901905 }
902906 }
907+
908+ /* any non-local rank must have a usable btl */
909+ if (!found_btl && comm_rank == rank ) {
910+ /* no btl = no rdma/atomics */
911+ rc = OMPI_ERR_UNREACH ;
912+ break ;
913+ }
903914 }
904915
905916 if (OMPI_SUCCESS != rc ) {
906917 free (possible_btls );
907918 free (btl_counts );
908-
909- /* no btl = no rdma/atomics */
910- return OMPI_ERR_NOT_AVAILABLE ;
919+ return rc ;
911920 }
912921
913922 for (int i = 0 ; i < max_btls ; ++ i ) {
0 commit comments