Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions opal/mca/btl/uct/btl_uct.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ struct mca_btl_uct_module_t {
/** array containing the am_tl and rdma_tl */
mca_btl_uct_tl_t *comm_tls[2];

#if UCT_API > UCT_VERSION(1, 7)
uct_component_h uct_component;
#endif

/** registration cache */
mca_rcache_base_module_t *rcache;

Expand Down
3 changes: 2 additions & 1 deletion opal/mca/btl/uct/btl_uct_am.c
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,8 @@ int mca_btl_uct_send_frag (mca_btl_uct_module_t *uct_btl, mca_btl_uct_base_frag_
if (!context->in_am_callback) {
mca_btl_uct_context_lock (context);
/* attempt to post the fragment */
if (NULL != frag->base.super.registration) {
if (NULL != frag->base.super.registration &&
(context->uct_iface_attr.cap.flags & UCT_IFACE_FLAG_AM_ZCOPY)) {
frag->comp.dev_context = context;
ucs_status = uct_ep_am_zcopy (ep_handle, MCA_BTL_UCT_FRAG, &frag->header, sizeof (frag->header),
&frag->uct_iov, 1, 0, &frag->comp.uct_comp);
Expand Down
4 changes: 2 additions & 2 deletions opal/mca/btl/uct/btl_uct_amo.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ int mca_btl_uct_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
mca_btl_uct_uct_completion_release (comp);
}

uct_rkey_release (&rkey);
mca_btl_uct_rkey_release (uct_btl, &rkey);

return rc;
}
Expand Down Expand Up @@ -184,7 +184,7 @@ int mca_btl_uct_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_e
mca_btl_uct_uct_completion_release (comp);
}

uct_rkey_release (&rkey);
mca_btl_uct_rkey_release (uct_btl, &rkey);

return rc;
}
82 changes: 79 additions & 3 deletions opal/mca/btl/uct/btl_uct_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,12 @@ ucs_status_t mca_btl_uct_am_handler (void *arg, void *data, size_t length, unsig
return UCS_OK;
}

#if UCT_API > UCT_VERSION(1, 7)
static int mca_btl_uct_component_process_uct_md (uct_component_h component, uct_md_resource_desc_t *md_desc,
char **allowed_ifaces)
#else
static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc, char **allowed_ifaces)
#endif
{
mca_rcache_base_resources_t rcache_resources;
uct_tl_resource_desc_t *tl_desc;
Expand Down Expand Up @@ -350,8 +355,14 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc

md = OBJ_NEW(mca_btl_uct_md_t);


#if UCT_API > UCT_VERSION(1, 7)
uct_md_config_read (component, NULL, NULL, &uct_config);
uct_md_open (component, md_desc->md_name, uct_config, &md->uct_md);
#else
uct_md_config_read (md_desc->md_name, NULL, NULL, &uct_config);
uct_md_open (md_desc->md_name, uct_config, &md->uct_md);
#endif
uct_config_release (uct_config);

uct_md_query (md->uct_md, &md_attr);
Expand All @@ -377,6 +388,10 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc
return OPAL_ERR_NOT_AVAILABLE;
}

#if UCT_API > UCT_VERSION(1, 7)
module->uct_component = component;
#endif

mca_btl_uct_component.modules[mca_btl_uct_component.module_count++] = module;

/* NTH: a registration cache shouldn't be necessary when using UCT but there are measurable
Expand All @@ -402,6 +417,42 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc
return OPAL_SUCCESS;
}

#if UCT_API > UCT_VERSION(1, 7)
static int mca_btl_uct_component_process_uct_component (uct_component_h component, char **allowed_ifaces)
{
uct_component_attr_t attr = {.field_mask = UCT_COMPONENT_ATTR_FIELD_NAME |
UCT_COMPONENT_ATTR_FIELD_MD_RESOURCE_COUNT};
ucs_status_t ucs_status;
int rc;

ucs_status = uct_component_query (component, &attr);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need to initialize field_mask and allocate md_resources..
@hjelmn seems like you didn't try to run it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seemed to run and found components but I was probably getting lucky :). Will fix.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of curiosity... Why not allocate the md_resources inside uct_component_query. Calling twice seems a little silly to me.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

Copy link
Contributor

@yosefe yosefe Sep 27, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of curiosity... Why not allocate the md_resources inside uct_component_query. Calling twice seems a little silly to me.

If uct_component_query allocates resources, would need another API to release that memory, in the existing scheme the user is responsible for memory allocation and could use something like alloca() or std::vector which do not require explicit free

if (UCS_OK != ucs_status) {
return OPAL_ERROR;
}

BTL_VERBOSE(("processing uct component %s", attr.name));

attr.md_resources = calloc (attr.md_resource_count, sizeof (*attr.md_resources));
attr.field_mask |= UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES;
ucs_status = uct_component_query (component, &attr);
if (UCS_OK != ucs_status) {
return OPAL_ERROR;
}

for (int i = 0 ; i < attr.md_resource_count ; ++i) {
rc = mca_btl_uct_component_process_uct_md (component, attr.md_resources + i,
allowed_ifaces);
if (OPAL_SUCCESS != rc) {
break;
}
}

free (attr.md_resources);

return OPAL_SUCCESS;
}
#endif /* UCT_API > UCT_VERSION(1, 7) */

/*
* UCT component initialization:
* (1) read interface list from kernel and compare against component parameters
Expand All @@ -417,6 +468,7 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules,
struct mca_btl_base_module_t **base_modules;
uct_md_resource_desc_t *resources;
unsigned resource_count;
ucs_status_t ucs_status;
char **allowed_ifaces;
int rc;

Expand All @@ -433,10 +485,32 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules,
return NULL;
}

uct_query_md_resources (&resources, &resource_count);

mca_btl_uct_component.module_count = 0;

#if UCT_API > UCT_VERSION(1, 7)
uct_component_h *components;
unsigned num_components;

ucs_status = uct_query_components(&components, &num_components);
if (UCS_OK != ucs_status) {
BTL_ERROR(("could not query UCT components"));
return NULL;
}

/* generate all suitable btl modules */
for (unsigned i = 0 ; i < num_components ; ++i) {
rc = mca_btl_uct_component_process_uct_component (components[i], allowed_ifaces);
if (OPAL_SUCCESS != rc) {
break;
}
}

uct_release_component_list (components);

#else /* UCT 1.6 and older */

uct_query_md_resources (&resources, &resource_count);

/* generate all suitable btl modules */
for (unsigned i = 0 ; i < resource_count ; ++i) {
rc = mca_btl_uct_component_process_uct_md (resources + i, allowed_ifaces);
Expand All @@ -445,9 +519,11 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules,
}
}

opal_argv_free (allowed_ifaces);
uct_release_md_resource_list (resources);

#endif /* UCT_API > UCT_VERSION(1, 7) */

opal_argv_free (allowed_ifaces);
mca_btl_uct_modex_send ();

/* pass module array back to caller */
Expand Down
4 changes: 2 additions & 2 deletions opal/mca/btl/uct/btl_uct_rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ int mca_btl_uct_get (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi

BTL_VERBOSE(("get issued. status = %d", ucs_status));

uct_rkey_release (&rkey);
mca_btl_uct_rkey_release (uct_btl, &rkey);

return OPAL_LIKELY(UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERR_RESOURCE_BUSY;
}
Expand Down Expand Up @@ -237,7 +237,7 @@ int mca_btl_uct_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi
mca_btl_uct_uct_completion_release (comp);
}

uct_rkey_release (&rkey);
mca_btl_uct_rkey_release (uct_btl, &rkey);

return OPAL_LIKELY(UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERR_RESOURCE_BUSY;
}
Expand Down
14 changes: 14 additions & 0 deletions opal/mca/btl/uct/btl_uct_rdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,22 @@ static inline int mca_btl_uct_get_rkey (mca_btl_uct_module_t *module,
return rc;
}

#if UCT_API > UCT_VERSION(1, 7)
ucs_status = uct_rkey_unpack (module->uct_component, (void *) remote_handle, rkey);
#else
ucs_status = uct_rkey_unpack ((void *) remote_handle, rkey);
#endif
return (UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERROR;
}

static inline void mca_btl_uct_rkey_release (mca_btl_uct_module_t *uct_btl, uct_rkey_bundle_t *rkey)
{
#if UCT_API > UCT_VERSION(1, 7)
uct_rkey_release (uct_btl->uct_component, rkey);
#else
(void) uct_btl;
uct_rkey_release (rkey);
#endif
}

#endif /* !defined(BTL_UCT_RDMA_H) */
16 changes: 14 additions & 2 deletions opal/mca/btl/uct/btl_uct_tl.c
Original file line number Diff line number Diff line change
Expand Up @@ -461,8 +461,14 @@ static void mca_btl_uct_set_tl_am (mca_btl_uct_module_t *module, mca_btl_uct_tl_
tl->max_device_contexts = mca_btl_uct_component.num_contexts_per_module;
}

module->super.btl_max_send_size = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.am.max_zcopy - sizeof (mca_btl_uct_am_header_t);
module->super.btl_eager_limit = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.am.max_bcopy - sizeof (mca_btl_uct_am_header_t);
module->super.btl_eager_limit = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.am.max_bcopy -
sizeof (mca_btl_uct_am_header_t);
if (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags & UCT_IFACE_FLAG_AM_ZCOPY) {
module->super.btl_max_send_size = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.am.max_zcopy -
sizeof (mca_btl_uct_am_header_t);
} else {
module->super.btl_max_send_size = module->super.btl_eager_limit;
}
}

static int mca_btl_uct_set_tl_conn (mca_btl_uct_module_t *module, mca_btl_uct_tl_t *tl)
Expand Down Expand Up @@ -516,7 +522,13 @@ static int mca_btl_uct_evaluate_tl (mca_btl_uct_module_t *module, mca_btl_uct_tl
* come up with a better estimate. */

/* UCT bandwidth is in bytes/sec, BTL is in MB/sec */
#if UCT_API > UCT_VERSION(1, 7)
module->super.btl_bandwidth = (uint32_t) ((MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth.dedicated +
MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth.shared /
(opal_process_info.num_local_peers + 1)) / 1048576.0);
#else
module->super.btl_bandwidth = (uint32_t) (MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth / 1048576.0);
#endif
/* TODO -- figure out how to translate UCT latency to us */
module->super.btl_latency = 1;
}
Expand Down