@@ -45,18 +45,47 @@ typedef struct ompi_osc_rdma_pending_post_t ompi_osc_rdma_pending_post_t;
4545
4646static OBJ_CLASS_INSTANCE (ompi_osc_rdma_pending_post_t , opal_list_item_t , NULL, NULL) ;
4747
48+ static void ompi_osc_rdma_pending_op_construct (ompi_osc_rdma_pending_op_t * pending_op )
49+ {
50+ pending_op -> op_frag = NULL ;
51+ pending_op -> op_buffer = NULL ;
52+ pending_op -> op_result = NULL ;
53+ pending_op -> op_complete = false;
54+ }
55+
56+ static void ompi_osc_rdma_pending_op_destruct (ompi_osc_rdma_pending_op_t * pending_op )
57+ {
58+ if (NULL != pending_op -> op_frag ) {
59+ ompi_osc_rdma_frag_complete (pending_op -> op_frag );
60+ }
61+
62+ ompi_osc_rdma_pending_op_construct (pending_op );
63+ }
64+
65+ OBJ_CLASS_INSTANCE (ompi_osc_rdma_pending_op_t , opal_list_item_t ,
66+ ompi_osc_rdma_pending_op_construct ,
67+ ompi_osc_rdma_pending_op_destruct );
68+
4869/**
4970 * Dummy completion function for atomic operations
5071 */
5172void ompi_osc_rdma_atomic_complete (mca_btl_base_module_t * btl , struct mca_btl_base_endpoint_t * endpoint ,
5273 void * local_address , mca_btl_base_registration_handle_t * local_handle ,
5374 void * context , void * data , int status )
5475{
55- volatile bool * atomic_complete = (volatile bool * ) context ;
76+ ompi_osc_rdma_pending_op_t * pending_op = (ompi_osc_rdma_pending_op_t * ) context ;
5677
57- if (atomic_complete ) {
58- * atomic_complete = true ;
78+ if (pending_op -> op_result ) {
79+ memmove ( pending_op -> op_result , pending_op -> op_buffer , pending_op -> op_size ) ;
5980 }
81+
82+ if (NULL != pending_op -> op_frag ) {
83+ ompi_osc_rdma_frag_complete (pending_op -> op_frag );
84+ pending_op -> op_frag = NULL ;
85+ }
86+
87+ pending_op -> op_complete = true;
88+ OBJ_RELEASE (pending_op );
6089}
6190
6291/**
@@ -179,9 +208,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
179208 ompi_osc_rdma_peer_t * * peers ;
180209 int my_rank = ompi_comm_rank (module -> comm );
181210 ompi_osc_rdma_state_t * state = module -> state ;
182- volatile bool atomic_complete ;
183- ompi_osc_rdma_frag_t * frag ;
184- osc_rdma_counter_t * temp ;
185211 int ret ;
186212
187213 OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_TRACE , "post: %p, %d, %s" , (void * ) group , assert , win -> w_name );
@@ -209,9 +235,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
209235 state -> num_complete_msgs = 0 ;
210236 OPAL_THREAD_UNLOCK (& module -> lock );
211237
212- /* allocate a temporary buffer for atomic response */
213- ret = ompi_osc_rdma_frag_alloc (module , 8 , & frag , (char * * ) & temp );
214-
215238 if ((assert & MPI_MODE_NOCHECK ) || 0 == ompi_group_size (group )) {
216239 return OMPI_SUCCESS ;
217240 }
@@ -223,7 +246,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
223246 /* translate group ranks into the communicator */
224247 peers = ompi_osc_rdma_get_peers (module , module -> pw_group );
225248 if (OPAL_UNLIKELY (NULL == peers )) {
226- ompi_osc_rdma_frag_complete (frag );
227249 return OMPI_ERR_OUT_OF_RESOURCE ;
228250 }
229251
@@ -233,65 +255,40 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
233255 for (int i = 0 ; i < ompi_group_size (module -> pw_group ) ; ++ i ) {
234256 ompi_osc_rdma_peer_t * peer = peers [i ];
235257 uint64_t target = (uint64_t ) (intptr_t ) peer -> state + offsetof (ompi_osc_rdma_state_t , post_index );
236- int post_index ;
258+ ompi_osc_rdma_lock_t post_index ;
237259
238260 if (peer -> rank == my_rank ) {
239261 ompi_osc_rdma_handle_post (module , my_rank , NULL , 0 );
240262 continue ;
241263 }
242264
243265 /* get a post index */
244- atomic_complete = false;
245266 if (!ompi_osc_rdma_peer_local_state (peer )) {
246- do {
247- ret = module -> selected_btl -> btl_atomic_fop (module -> selected_btl , peer -> state_endpoint , temp , target , frag -> handle ,
248- peer -> state_handle , MCA_BTL_ATOMIC_ADD , 1 , 0 , MCA_BTL_NO_ORDER ,
249- ompi_osc_rdma_atomic_complete , (void * ) & atomic_complete , NULL );
250- assert (OPAL_SUCCESS >= ret );
251-
252- if (OMPI_SUCCESS == ret ) {
253- while (!atomic_complete ) {
254- ompi_osc_rdma_progress (module );
255- }
256-
257- break ;
258- }
259-
260- ompi_osc_rdma_progress (module );
261- } while (1 );
267+ ret = ompi_osc_rdma_lock_btl_fop (module , peer , target , MCA_BTL_ATOMIC_ADD , 1 , & post_index , true);
268+ assert (OMPI_SUCCESS == ret );
262269 } else {
263- * temp = ompi_osc_rdma_counter_add ((osc_rdma_counter_t * ) (intptr_t ) target , 1 ) - 1 ;
270+ post_index = ompi_osc_rdma_counter_add ((osc_rdma_counter_t * ) (intptr_t ) target , 1 ) - 1 ;
264271 }
265- post_index = (* temp ) & (OMPI_OSC_RDMA_POST_PEER_MAX - 1 );
272+
273+ post_index &= OMPI_OSC_RDMA_POST_PEER_MAX - 1 ;
266274
267275 target = (uint64_t ) (intptr_t ) peer -> state + offsetof (ompi_osc_rdma_state_t , post_peers ) +
268276 sizeof (osc_rdma_counter_t ) * post_index ;
269277
270278 do {
279+ ompi_osc_rdma_lock_t result ;
280+
271281 OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_TRACE , "attempting to post to index %d @ rank %d" , post_index , peer -> rank );
272282
273283 /* try to post. if the value isn't 0 then another rank is occupying this index */
274284 if (!ompi_osc_rdma_peer_local_state (peer )) {
275- atomic_complete = false;
276- ret = module -> selected_btl -> btl_atomic_cswap (module -> selected_btl , peer -> state_endpoint , temp , target , frag -> handle , peer -> state_handle ,
277- 0 , 1 + (int64_t ) my_rank , 0 , MCA_BTL_NO_ORDER , ompi_osc_rdma_atomic_complete ,
278- (void * ) & atomic_complete , NULL );
279- assert (OPAL_SUCCESS >= ret );
280-
281- if (OMPI_SUCCESS == ret ) {
282- while (!atomic_complete ) {
283- ompi_osc_rdma_progress (module );
284- }
285- } else {
286- ompi_osc_rdma_progress (module );
287- continue ;
288- }
289-
285+ ret = ompi_osc_rdma_lock_btl_cswap (module , peer , target , 0 , 1 + (int64_t ) my_rank , & result );
286+ assert (OMPI_SUCCESS == ret );
290287 } else {
291- * temp = !ompi_osc_rdma_lock_cmpset ((osc_rdma_counter_t * ) target , 0 , 1 + (osc_rdma_counter_t ) my_rank );
288+ result = !ompi_osc_rdma_lock_cmpset ((osc_rdma_counter_t * ) target , 0 , 1 + (osc_rdma_counter_t ) my_rank );
292289 }
293290
294- if (OPAL_LIKELY (0 == * temp )) {
291+ if (OPAL_LIKELY (0 == result )) {
295292 break ;
296293 }
297294
@@ -310,8 +307,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
310307 } while (1 );
311308 }
312309
313- ompi_osc_rdma_frag_complete (frag );
314-
315310 ompi_osc_rdma_release_peers (peers , ompi_group_size (module -> pw_group ));
316311
317312 OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_TRACE , "post complete" );
@@ -419,9 +414,7 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win)
419414{
420415 ompi_osc_rdma_module_t * module = GET_MODULE (win );
421416 ompi_osc_rdma_sync_t * sync = & module -> all_sync ;
422- ompi_osc_rdma_frag_t * frag = NULL ;
423417 ompi_osc_rdma_peer_t * * peers ;
424- void * scratch_lock = NULL ;
425418 ompi_group_t * group ;
426419 int group_size , ret ;
427420
@@ -456,45 +449,19 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win)
456449
457450 ompi_osc_rdma_sync_rdma_complete (sync );
458451
459- if (!(MCA_BTL_FLAGS_ATOMIC_OPS & module -> selected_btl -> btl_flags )) {
460- /* need a temporary buffer for performing fetching atomics */
461- ret = ompi_osc_rdma_frag_alloc (module , 8 , & frag , (char * * ) & scratch_lock );
462- if (OPAL_UNLIKELY (OPAL_SUCCESS != ret )) {
463- return ret ;
464- }
465- }
466-
467452 /* for each process in the group increment their number of complete messages */
468453 for (int i = 0 ; i < group_size ; ++ i ) {
469454 ompi_osc_rdma_peer_t * peer = peers [i ];
470455 intptr_t target = (intptr_t ) peer -> state + offsetof (ompi_osc_rdma_state_t , num_complete_msgs );
471456
472457 if (!ompi_osc_rdma_peer_local_state (peer )) {
473- do {
474- if (MCA_BTL_FLAGS_ATOMIC_OPS & module -> selected_btl -> btl_flags ) {
475- ret = module -> selected_btl -> btl_atomic_op (module -> selected_btl , peer -> state_endpoint , target , peer -> state_handle ,
476- MCA_BTL_ATOMIC_ADD , 1 , 0 , MCA_BTL_NO_ORDER ,
477- ompi_osc_rdma_atomic_complete , NULL , NULL );
478- } else {
479- /* don't care about the read value so use the scratch lock */
480- ret = module -> selected_btl -> btl_atomic_fop (module -> selected_btl , peer -> state_endpoint , scratch_lock ,
481- target , frag -> handle , peer -> state_handle , MCA_BTL_ATOMIC_ADD , 1 ,
482- 0 , MCA_BTL_NO_ORDER , ompi_osc_rdma_atomic_complete , NULL , NULL );
483- }
484-
485- if (OPAL_LIKELY (OMPI_SUCCESS == ret )) {
486- break ;
487- }
488- } while (1 );
458+ ret = ompi_osc_rdma_lock_btl_op (module , peer , target , MCA_BTL_ATOMIC_ADD , 1 , true);
459+ assert (OMPI_SUCCESS == ret );
489460 } else {
490461 (void ) ompi_osc_rdma_counter_add ((osc_rdma_counter_t * ) target , 1 );
491462 }
492463 }
493464
494- if (frag ) {
495- ompi_osc_rdma_frag_complete (frag );
496- }
497-
498465 /* release our reference to peers in this group */
499466 ompi_osc_rdma_release_peers (peers , group_size );
500467
0 commit comments