Skip to content

Commit d151cf5

Browse files
committed
oshmem: Add actual symmetric remote keys deduplication calls
Invoke the lookup call, but only for ranks from different node where, scale is actually problematic. Signed-off-by: Thomas Vegas <[email protected]>
1 parent c79f80b commit d151cf5

File tree

1 file changed

+14
-4
lines changed

1 file changed

+14
-4
lines changed

oshmem/mca/spml/ucx/spml_ucx.c

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,7 @@ int mca_spml_ucx_ctx_mkey_add(mca_spml_ucx_ctx_t *ucx_ctx, int pe, uint32_t segn
372372
{
373373
int rc;
374374
ucs_status_t err;
375+
ucp_rkey_h rkey;
375376

376377
rc = mca_spml_ucx_ctx_mkey_new(ucx_ctx, pe, segno, ucx_mkey);
377378
if (OSHMEM_SUCCESS != rc) {
@@ -380,11 +381,18 @@ int mca_spml_ucx_ctx_mkey_add(mca_spml_ucx_ctx_t *ucx_ctx, int pe, uint32_t segn
380381
}
381382

382383
if (mkey->u.data) {
383-
err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[pe].ucp_conn, mkey->u.data, &((*ucx_mkey)->rkey));
384+
err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[pe].ucp_conn, mkey->u.data, &rkey);
384385
if (UCS_OK != err) {
385386
SPML_UCX_ERROR("failed to unpack rkey: %s", ucs_status_string(err));
386387
return OSHMEM_ERROR;
387388
}
389+
390+
if (!oshmem_proc_on_local_node(pe)) {
391+
rkey = mca_spml_ucx_rkey_store_get(&mca_spml_ucx.rkey_store, rkey);
392+
}
393+
394+
(*ucx_mkey)->rkey = rkey;
395+
388396
rc = mca_spml_ucx_ctx_mkey_cache(ucx_ctx, mkey, segno, pe);
389397
if (OSHMEM_SUCCESS != rc) {
390398
SPML_UCX_ERROR("mca_spml_ucx_ctx_mkey_cache failed");
@@ -399,7 +407,11 @@ int mca_spml_ucx_ctx_mkey_del(mca_spml_ucx_ctx_t *ucx_ctx, int pe, uint32_t segn
399407
ucp_peer_t *ucp_peer;
400408
int rc;
401409
ucp_peer = &(ucx_ctx->ucp_peers[pe]);
402-
ucp_rkey_destroy(ucx_mkey->rkey);
410+
if (!oshmem_proc_on_local_node(pe)) {
411+
mca_spml_ucx_rkey_store_put(&mca_spml_ucx.rkey_store, ucx_mkey->rkey);
412+
} else {
413+
ucp_rkey_destroy(ucx_mkey->rkey);
414+
}
403415
ucx_mkey->rkey = NULL;
404416
rc = mca_spml_ucx_peer_mkey_cache_del(ucp_peer, segno);
405417
if(OSHMEM_SUCCESS != rc){
@@ -1808,5 +1820,3 @@ int mca_spml_ucx_team_reduce(shmem_team_t team, void
18081820
{
18091821
return OSHMEM_ERR_NOT_IMPLEMENTED;
18101822
}
1811-
1812-

0 commit comments

Comments
 (0)