Skip to content

Commit 1ca142e

Browse files
committed
oshmem: Add actual symmetric remote keys deduplication calls
Invoke the lookup call, but only for ranks from different node where, scale is actually problematic. Signed-off-by: Thomas Vegas <[email protected]>
1 parent a415d03 commit 1ca142e

File tree

1 file changed

+14
-4
lines changed

1 file changed

+14
-4
lines changed

oshmem/mca/spml/ucx/spml_ucx.c

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,7 @@ int mca_spml_ucx_ctx_mkey_add(mca_spml_ucx_ctx_t *ucx_ctx, int pe, uint32_t segn
374374
{
375375
int rc;
376376
ucs_status_t err;
377+
ucp_rkey_h rkey;
377378

378379
rc = mca_spml_ucx_ctx_mkey_new(ucx_ctx, pe, segno, ucx_mkey);
379380
if (OSHMEM_SUCCESS != rc) {
@@ -382,11 +383,18 @@ int mca_spml_ucx_ctx_mkey_add(mca_spml_ucx_ctx_t *ucx_ctx, int pe, uint32_t segn
382383
}
383384

384385
if (mkey->u.data) {
385-
err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[pe].ucp_conn, mkey->u.data, &((*ucx_mkey)->rkey));
386+
err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[pe].ucp_conn, mkey->u.data, &rkey);
386387
if (UCS_OK != err) {
387388
SPML_UCX_ERROR("failed to unpack rkey: %s", ucs_status_string(err));
388389
return OSHMEM_ERROR;
389390
}
391+
392+
if (!oshmem_proc_on_local_node(pe)) {
393+
rkey = mca_spml_ucx_rkey_store_get(&mca_spml_ucx.rkey_store, ucx_ctx->ucp_worker[0], rkey);
394+
}
395+
396+
(*ucx_mkey)->rkey = rkey;
397+
390398
rc = mca_spml_ucx_ctx_mkey_cache(ucx_ctx, mkey, segno, pe);
391399
if (OSHMEM_SUCCESS != rc) {
392400
SPML_UCX_ERROR("mca_spml_ucx_ctx_mkey_cache failed");
@@ -401,7 +409,11 @@ int mca_spml_ucx_ctx_mkey_del(mca_spml_ucx_ctx_t *ucx_ctx, int pe, uint32_t segn
401409
ucp_peer_t *ucp_peer;
402410
int rc;
403411
ucp_peer = &(ucx_ctx->ucp_peers[pe]);
404-
ucp_rkey_destroy(ucx_mkey->rkey);
412+
if (!oshmem_proc_on_local_node(pe)) {
413+
mca_spml_ucx_rkey_store_put(&mca_spml_ucx.rkey_store, ucx_mkey->rkey);
414+
} else {
415+
ucp_rkey_destroy(ucx_mkey->rkey);
416+
}
405417
ucx_mkey->rkey = NULL;
406418
rc = mca_spml_ucx_peer_mkey_cache_del(ucp_peer, segno);
407419
if(OSHMEM_SUCCESS != rc){
@@ -1810,5 +1822,3 @@ int mca_spml_ucx_team_reduce(shmem_team_t team, void
18101822
{
18111823
return OSHMEM_ERR_NOT_IMPLEMENTED;
18121824
}
1813-
1814-

0 commit comments

Comments
 (0)