Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.

Commit f38f325

Browse files
authored
Merge pull request #1326 from jsquyres/pr/usnic-thread-multiple
usnic updates: MPI_THREAD_MULTIPLE, libfabric v1.4, ...etc.
2 parents 5de8551 + 6c1ab96 commit f38f325

14 files changed

+267
-95
lines changed

opal/mca/btl/usnic/README.txt

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,3 +335,40 @@ libfabric abstractions:
335335
fi_fabric: corresponds to a VIC PF
336336
fi_domain: corresponds to a VIC VF
337337
fi_endpoint: resources inside the VIC VF (basically a QP)
338+
339+
======================================
340+
341+
MPI_THREAD_MULTIPLE support
342+
343+
In order to make usnic btl thread-safe, the mutex locks are issued
344+
to protect the critical path. ie; libfabric routines, book keeping, etc.
345+
346+
The said lock is btl_usnic_lock. It is a RECURSIVE lock, meaning that
347+
the same thread can take the lock again even if it already has the lock to
348+
allow the callback function to post another segment right away if we know
349+
that the current segment is completed inline. (So we can call send in send
350+
without deadlocking)
351+
352+
These two functions taking care of hotel checkin/checkout and we
353+
have to protect that part. So we take the mutex lock before we enter the
354+
function.
355+
356+
- opal_btl_usnic_check_rts()
357+
- opal_btl_usnic_handle_ack()
358+
359+
We also have to protect the call to libfabric routines
360+
361+
- opal_btl_usnic_endpoint_send_segment() (fi_send)
362+
- opal_btl_usnic_recv_call() (fi_recvmsg)
363+
364+
have to be protected as well.
365+
366+
Also cclient connection checking (opal_btl_usnic_connectivity_ping) has to be
367+
protected. This happens only in the beginning but cclient communicate with cagent
368+
through opal_fd_read/write() and if two or more clients do opal_fd_write() at the
369+
same time, the data might be corrupt.
370+
371+
With this concept, many functions in btl/usnic that make calls to the
372+
listed functions are protected by OPAL_THREAD_LOCK macro which will only
373+
be active if the user specify MPI_Init_thread() with MPI_THREAD_MULTIPLE
374+
support.

opal/mca/btl/usnic/btl_usnic.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ BEGIN_C_DECLS
5656
* at other times as needed or as tuning dictates.
5757
*/
5858
extern uint64_t opal_btl_usnic_ticks;
59+
60+
/* Lock for MPU_THREAD_MULTIPLE support */
61+
extern opal_recursive_mutex_t btl_usnic_lock;
62+
5963
static inline uint64_t
6064
get_nsec(void)
6165
{

opal/mca/btl/usnic/btl_usnic_cclient.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ int opal_btl_usnic_connectivity_listen(opal_btl_usnic_module_t *module)
197197
/* Ensure to NULL-terminate the passed strings */
198198
strncpy(cmd.nodename, opal_process_info.nodename,
199199
CONNECTIVITY_NODENAME_LEN - 1);
200-
strncpy(cmd.usnic_name, module->fabric_info->fabric_attr->name,
200+
strncpy(cmd.usnic_name, module->linux_device_name,
201201
CONNECTIVITY_IFNAME_LEN - 1);
202202

203203
if (OPAL_SUCCESS != opal_fd_write(agent_fd, sizeof(cmd), &cmd)) {
@@ -234,6 +234,9 @@ int opal_btl_usnic_connectivity_ping(uint32_t src_ipv4_addr, int src_port,
234234
return OPAL_SUCCESS;
235235
}
236236

237+
/* Protect opal_fd_write for multithreaded case */
238+
OPAL_THREAD_LOCK(&btl_usnic_lock);
239+
237240
/* Send the PING command */
238241
int id = CONNECTIVITY_AGENT_CMD_PING;
239242
if (OPAL_SUCCESS != opal_fd_write(agent_fd, sizeof(id), &id)) {
@@ -260,6 +263,9 @@ int opal_btl_usnic_connectivity_ping(uint32_t src_ipv4_addr, int src_port,
260263
/* Will not return */
261264
}
262265

266+
/* Unlock and return */
267+
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
268+
263269
return OPAL_SUCCESS;
264270
}
265271

opal/mca/btl/usnic/btl_usnic_compat.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved.
2+
* Copyright (c) 2014-2016 Cisco Systems, Inc. All rights reserved.
33
* Copyright (c) 2015 Intel, Inc. All rights reserved.
44
* $COPYRIGHT$
55
*
@@ -509,6 +509,7 @@ opal_btl_usnic_prepare_src(
509509
size_t* size,
510510
uint32_t flags)
511511
{
512+
OPAL_THREAD_LOCK(&btl_usnic_lock);
512513
opal_btl_usnic_module_t *module = (opal_btl_usnic_module_t*) base_module;
513514
opal_btl_usnic_send_frag_t *frag;
514515
uint32_t payload_len;
@@ -535,7 +536,7 @@ opal_btl_usnic_prepare_src(
535536

536537
#if MSGDEBUG2
537538
opal_output(0, "prep_src: %s %s frag %p, size=%d+%u (was %u), conv=%p\n",
538-
module->fabric_info->fabric_attr->name,
539+
module->linux_device_name,
539540
(reserve + *size) <= module->max_frag_payload?"small":"large",
540541
(void *)frag, (int)reserve, (unsigned)*size, (unsigned)osize,
541542
(void *)convertor);
@@ -552,6 +553,7 @@ opal_btl_usnic_prepare_src(
552553
#endif
553554
#endif
554555

556+
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
555557
return &frag->sf_base.uf_base;
556558
}
557559

@@ -721,7 +723,7 @@ opal_btl_usnic_prepare_src(struct mca_btl_base_module_t *base_module,
721723

722724
#if MSGDEBUG2
723725
opal_output(0, "prep_src: %s %s frag %p, size=%d+%u (was %u), conv=%p\n",
724-
module->fabric_info->fabric_attr->name,
726+
module->linux_device_name,
725727
(reserve + *size) <= module->max_frag_payload?"small":"large",
726728
(void *)frag, (int)reserve, (unsigned)*size, (unsigned)osize,
727729
(void *)convertor);

0 commit comments

Comments
 (0)