Skip to content

Commit c2559ff

Browse files
authored
Merge pull request #3783 from hjelmn/v2.x_osc_rdma_locks
osc/rdma: rework locking code to improve behavior of unlock
2 parents d22a538 + 6381658 commit c2559ff

File tree

3 files changed

+136
-130
lines changed

3 files changed

+136
-130
lines changed

ompi/mca/osc/rdma/osc_rdma_active_target.c

Lines changed: 46 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -45,18 +45,47 @@ typedef struct ompi_osc_rdma_pending_post_t ompi_osc_rdma_pending_post_t;
4545

4646
static OBJ_CLASS_INSTANCE(ompi_osc_rdma_pending_post_t, opal_list_item_t, NULL, NULL);
4747

48+
static void ompi_osc_rdma_pending_op_construct (ompi_osc_rdma_pending_op_t *pending_op)
49+
{
50+
pending_op->op_frag = NULL;
51+
pending_op->op_buffer = NULL;
52+
pending_op->op_result = NULL;
53+
pending_op->op_complete = false;
54+
}
55+
56+
static void ompi_osc_rdma_pending_op_destruct (ompi_osc_rdma_pending_op_t *pending_op)
57+
{
58+
if (NULL != pending_op->op_frag) {
59+
ompi_osc_rdma_frag_complete (pending_op->op_frag);
60+
}
61+
62+
ompi_osc_rdma_pending_op_construct (pending_op);
63+
}
64+
65+
OBJ_CLASS_INSTANCE(ompi_osc_rdma_pending_op_t, opal_list_item_t,
66+
ompi_osc_rdma_pending_op_construct,
67+
ompi_osc_rdma_pending_op_destruct);
68+
4869
/**
4970
* Dummy completion function for atomic operations
5071
*/
5172
void ompi_osc_rdma_atomic_complete (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
5273
void *local_address, mca_btl_base_registration_handle_t *local_handle,
5374
void *context, void *data, int status)
5475
{
55-
volatile bool *atomic_complete = (volatile bool *) context;
76+
ompi_osc_rdma_pending_op_t *pending_op = (ompi_osc_rdma_pending_op_t *) context;
5677

57-
if (atomic_complete) {
58-
*atomic_complete = true;
78+
if (pending_op->op_result) {
79+
memmove (pending_op->op_result, pending_op->op_buffer, pending_op->op_size);
5980
}
81+
82+
if (NULL != pending_op->op_frag) {
83+
ompi_osc_rdma_frag_complete (pending_op->op_frag);
84+
pending_op->op_frag = NULL;
85+
}
86+
87+
pending_op->op_complete = true;
88+
OBJ_RELEASE(pending_op);
6089
}
6190

6291
/**
@@ -179,9 +208,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
179208
ompi_osc_rdma_peer_t **peers;
180209
int my_rank = ompi_comm_rank (module->comm);
181210
ompi_osc_rdma_state_t *state = module->state;
182-
volatile bool atomic_complete;
183-
ompi_osc_rdma_frag_t *frag;
184-
osc_rdma_counter_t *temp;
185211
int ret;
186212

187213
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "post: %p, %d, %s", (void*) group, assert, win->w_name);
@@ -209,9 +235,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
209235
state->num_complete_msgs = 0;
210236
OPAL_THREAD_UNLOCK(&module->lock);
211237

212-
/* allocate a temporary buffer for atomic response */
213-
ret = ompi_osc_rdma_frag_alloc (module, 8, &frag, (char **) &temp);
214-
215238
if ((assert & MPI_MODE_NOCHECK) || 0 == ompi_group_size (group)) {
216239
return OMPI_SUCCESS;
217240
}
@@ -223,7 +246,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
223246
/* translate group ranks into the communicator */
224247
peers = ompi_osc_rdma_get_peers (module, module->pw_group);
225248
if (OPAL_UNLIKELY(NULL == peers)) {
226-
ompi_osc_rdma_frag_complete (frag);
227249
return OMPI_ERR_OUT_OF_RESOURCE;
228250
}
229251

@@ -233,65 +255,40 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
233255
for (int i = 0 ; i < ompi_group_size(module->pw_group) ; ++i) {
234256
ompi_osc_rdma_peer_t *peer = peers[i];
235257
uint64_t target = (uint64_t) (intptr_t) peer->state + offsetof (ompi_osc_rdma_state_t, post_index);
236-
int post_index;
258+
ompi_osc_rdma_lock_t post_index;
237259

238260
if (peer->rank == my_rank) {
239261
ompi_osc_rdma_handle_post (module, my_rank, NULL, 0);
240262
continue;
241263
}
242264

243265
/* get a post index */
244-
atomic_complete = false;
245266
if (!ompi_osc_rdma_peer_local_state (peer)) {
246-
do {
247-
ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, temp, target, frag->handle,
248-
peer->state_handle, MCA_BTL_ATOMIC_ADD, 1, 0, MCA_BTL_NO_ORDER,
249-
ompi_osc_rdma_atomic_complete, (void *) &atomic_complete, NULL);
250-
assert (OPAL_SUCCESS >= ret);
251-
252-
if (OMPI_SUCCESS == ret) {
253-
while (!atomic_complete) {
254-
ompi_osc_rdma_progress (module);
255-
}
256-
257-
break;
258-
}
259-
260-
ompi_osc_rdma_progress (module);
261-
} while (1);
267+
ret = ompi_osc_rdma_lock_btl_fop (module, peer, target, MCA_BTL_ATOMIC_ADD, 1, &post_index, true);
268+
assert (OMPI_SUCCESS == ret);
262269
} else {
263-
*temp = ompi_osc_rdma_counter_add ((osc_rdma_counter_t *) (intptr_t) target, 1) - 1;
270+
post_index = ompi_osc_rdma_counter_add ((osc_rdma_counter_t *) (intptr_t) target, 1) - 1;
264271
}
265-
post_index = (*temp) & (OMPI_OSC_RDMA_POST_PEER_MAX - 1);
272+
273+
post_index &= OMPI_OSC_RDMA_POST_PEER_MAX - 1;
266274

267275
target = (uint64_t) (intptr_t) peer->state + offsetof (ompi_osc_rdma_state_t, post_peers) +
268276
sizeof (osc_rdma_counter_t) * post_index;
269277

270278
do {
279+
ompi_osc_rdma_lock_t result;
280+
271281
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attempting to post to index %d @ rank %d", post_index, peer->rank);
272282

273283
/* try to post. if the value isn't 0 then another rank is occupying this index */
274284
if (!ompi_osc_rdma_peer_local_state (peer)) {
275-
atomic_complete = false;
276-
ret = module->selected_btl->btl_atomic_cswap (module->selected_btl, peer->state_endpoint, temp, target, frag->handle, peer->state_handle,
277-
0, 1 + (int64_t) my_rank, 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete,
278-
(void *) &atomic_complete, NULL);
279-
assert (OPAL_SUCCESS >= ret);
280-
281-
if (OMPI_SUCCESS == ret) {
282-
while (!atomic_complete) {
283-
ompi_osc_rdma_progress (module);
284-
}
285-
} else {
286-
ompi_osc_rdma_progress (module);
287-
continue;
288-
}
289-
285+
ret = ompi_osc_rdma_lock_btl_cswap (module, peer, target, 0, 1 + (int64_t) my_rank, &result);
286+
assert (OMPI_SUCCESS == ret);
290287
} else {
291-
*temp = !ompi_osc_rdma_lock_cmpset ((osc_rdma_counter_t *) target, 0, 1 + (osc_rdma_counter_t) my_rank);
288+
result = !ompi_osc_rdma_lock_cmpset ((osc_rdma_counter_t *) target, 0, 1 + (osc_rdma_counter_t) my_rank);
292289
}
293290

294-
if (OPAL_LIKELY(0 == *temp)) {
291+
if (OPAL_LIKELY(0 == result)) {
295292
break;
296293
}
297294

@@ -310,8 +307,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
310307
} while (1);
311308
}
312309

313-
ompi_osc_rdma_frag_complete (frag);
314-
315310
ompi_osc_rdma_release_peers (peers, ompi_group_size(module->pw_group));
316311

317312
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "post complete");
@@ -419,9 +414,7 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win)
419414
{
420415
ompi_osc_rdma_module_t *module = GET_MODULE(win);
421416
ompi_osc_rdma_sync_t *sync = &module->all_sync;
422-
ompi_osc_rdma_frag_t *frag = NULL;
423417
ompi_osc_rdma_peer_t **peers;
424-
void *scratch_lock = NULL;
425418
ompi_group_t *group;
426419
int group_size, ret;
427420

@@ -456,45 +449,19 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win)
456449

457450
ompi_osc_rdma_sync_rdma_complete (sync);
458451

459-
if (!(MCA_BTL_FLAGS_ATOMIC_OPS & module->selected_btl->btl_flags)) {
460-
/* need a temporary buffer for performing fetching atomics */
461-
ret = ompi_osc_rdma_frag_alloc (module, 8, &frag, (char **) &scratch_lock);
462-
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
463-
return ret;
464-
}
465-
}
466-
467452
/* for each process in the group increment their number of complete messages */
468453
for (int i = 0 ; i < group_size ; ++i) {
469454
ompi_osc_rdma_peer_t *peer = peers[i];
470455
intptr_t target = (intptr_t) peer->state + offsetof (ompi_osc_rdma_state_t, num_complete_msgs);
471456

472457
if (!ompi_osc_rdma_peer_local_state (peer)) {
473-
do {
474-
if (MCA_BTL_FLAGS_ATOMIC_OPS & module->selected_btl->btl_flags) {
475-
ret = module->selected_btl->btl_atomic_op (module->selected_btl, peer->state_endpoint, target, peer->state_handle,
476-
MCA_BTL_ATOMIC_ADD, 1, 0, MCA_BTL_NO_ORDER,
477-
ompi_osc_rdma_atomic_complete, NULL, NULL);
478-
} else {
479-
/* don't care about the read value so use the scratch lock */
480-
ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, scratch_lock,
481-
target, frag->handle, peer->state_handle, MCA_BTL_ATOMIC_ADD, 1,
482-
0, MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, NULL, NULL);
483-
}
484-
485-
if (OPAL_LIKELY(OMPI_SUCCESS == ret)) {
486-
break;
487-
}
488-
} while (1);
458+
ret = ompi_osc_rdma_lock_btl_op (module, peer, target, MCA_BTL_ATOMIC_ADD, 1, true);
459+
assert (OMPI_SUCCESS == ret);
489460
} else {
490461
(void) ompi_osc_rdma_counter_add ((osc_rdma_counter_t *) target, 1);
491462
}
492463
}
493464

494-
if (frag) {
495-
ompi_osc_rdma_frag_complete (frag);
496-
}
497-
498465
/* release our reference to peers in this group */
499466
ompi_osc_rdma_release_peers (peers, group_size);
500467

0 commit comments

Comments
 (0)