Skip to content

Commit 17611b8

Browse files
committed
oshmem: Add actual symmetric remote keys deduplication calls
Invoke the lookup call, but only for ranks from different node where, scale is actually problematic. Signed-off-by: Thomas Vegas <[email protected]>
1 parent bee4412 commit 17611b8

File tree

1 file changed

+14
-4
lines changed

1 file changed

+14
-4
lines changed

oshmem/mca/spml/ucx/spml_ucx.c

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,7 @@ int mca_spml_ucx_ctx_mkey_add(mca_spml_ucx_ctx_t *ucx_ctx, int pe, uint32_t segn
373373
{
374374
int rc;
375375
ucs_status_t err;
376+
ucp_rkey_h rkey;
376377

377378
rc = mca_spml_ucx_ctx_mkey_new(ucx_ctx, pe, segno, ucx_mkey);
378379
if (OSHMEM_SUCCESS != rc) {
@@ -381,11 +382,18 @@ int mca_spml_ucx_ctx_mkey_add(mca_spml_ucx_ctx_t *ucx_ctx, int pe, uint32_t segn
381382
}
382383

383384
if (mkey->u.data) {
384-
err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[pe].ucp_conn, mkey->u.data, &((*ucx_mkey)->rkey));
385+
err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[pe].ucp_conn, mkey->u.data, &rkey);
385386
if (UCS_OK != err) {
386387
SPML_UCX_ERROR("failed to unpack rkey: %s", ucs_status_string(err));
387388
return OSHMEM_ERROR;
388389
}
390+
391+
if (!oshmem_proc_on_local_node(pe)) {
392+
rkey = mca_spml_ucx_rkey_store_get(&mca_spml_ucx.rkey_store, ucx_ctx->ucp_worker[0], rkey);
393+
}
394+
395+
(*ucx_mkey)->rkey = rkey;
396+
389397
rc = mca_spml_ucx_ctx_mkey_cache(ucx_ctx, mkey, segno, pe);
390398
if (OSHMEM_SUCCESS != rc) {
391399
SPML_UCX_ERROR("mca_spml_ucx_ctx_mkey_cache failed");
@@ -400,7 +408,11 @@ int mca_spml_ucx_ctx_mkey_del(mca_spml_ucx_ctx_t *ucx_ctx, int pe, uint32_t segn
400408
ucp_peer_t *ucp_peer;
401409
int rc;
402410
ucp_peer = &(ucx_ctx->ucp_peers[pe]);
403-
ucp_rkey_destroy(ucx_mkey->rkey);
411+
if (!oshmem_proc_on_local_node(pe)) {
412+
mca_spml_ucx_rkey_store_put(&mca_spml_ucx.rkey_store, ucx_mkey->rkey);
413+
} else {
414+
ucp_rkey_destroy(ucx_mkey->rkey);
415+
}
404416
ucx_mkey->rkey = NULL;
405417
rc = mca_spml_ucx_peer_mkey_cache_del(ucp_peer, segno);
406418
if(OSHMEM_SUCCESS != rc){
@@ -1809,5 +1821,3 @@ int mca_spml_ucx_team_reduce(shmem_team_t team, void
18091821
{
18101822
return OSHMEM_ERR_NOT_IMPLEMENTED;
18111823
}
1812-
1813-

0 commit comments

Comments
 (0)