diff --git a/ompi/interlib/interlib.c b/ompi/interlib/interlib.c index 2015f6ec6aa..7222a5c6e91 100644 --- a/ompi/interlib/interlib.c +++ b/ompi/interlib/interlib.c @@ -155,10 +155,9 @@ int ompi_interlib_declare(int threadlevel, char *version) } opal_list_append(&info, &kv->super); /* call pmix to initialize these values */ - if (OPAL_SUCCESS != (ret = opal_pmix.init(&info))) { - OPAL_LIST_DESTRUCT(&info); - return ret; - } + ret = opal_pmix.init(&info); OPAL_LIST_DESTRUCT(&info); - return OMPI_SUCCESS; + /* account for our refcount on pmix_init */ + opal_pmix.finalize(); + return ret; } diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index da8a406adb9..b06a32d7b97 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -277,9 +277,6 @@ int ompi_mpi_finalize(void) } } - /* account for our refcount on pmix_init */ - opal_pmix.finalize(); - /* check for timing request - get stop time and report elapsed time if so */ //OPAL_TIMING_DELTAS(ompi_enable_timing, &tm); diff --git a/opal/mca/pmix/base/base.h b/opal/mca/pmix/base/base.h index 4c499ff5d1d..5ca6241ce77 100644 --- a/opal/mca/pmix/base/base.h +++ b/opal/mca/pmix/base/base.h @@ -14,7 +14,7 @@ #include "opal_config.h" #include "opal/types.h" - +#include "opal/threads/threads.h" #include "opal/mca/mca.h" #include "opal/mca/base/mca_base_framework.h" @@ -55,13 +55,133 @@ OPAL_DECLSPEC int opal_pmix_base_exchange(opal_value_t *info, OPAL_DECLSPEC void opal_pmix_base_set_evbase(opal_event_base_t *evbase); +#define opal_pmix_condition_wait(a,b) pthread_cond_wait(a, &(b)->m_lock_pthread) +typedef pthread_cond_t opal_pmix_condition_t; +#define opal_pmix_condition_broadcast(a) pthread_cond_broadcast(a) +#define opal_pmix_condition_signal(a) pthread_cond_signal(a) +#define OPAL_PMIX_CONDITION_STATIC_INIT PTHREAD_COND_INITIALIZER + +typedef struct { + opal_mutex_t mutex; + opal_pmix_condition_t cond; + volatile bool active; +} opal_pmix_lock_t; + + typedef struct { opal_event_base_t *evbase; int timeout; + int initialized; + opal_pmix_lock_t lock; } opal_pmix_base_t; extern opal_pmix_base_t opal_pmix_base; +#define OPAL_PMIX_CONSTRUCT_LOCK(l) \ + do { \ + OBJ_CONSTRUCT(&(l)->mutex, opal_mutex_t); \ + pthread_cond_init(&(l)->cond, NULL); \ + (l)->active = true; \ + } while(0) + +#define OPAL_PMIX_DESTRUCT_LOCK(l) \ + do { \ + OBJ_DESTRUCT(&(l)->mutex); \ + pthread_cond_destroy(&(l)->cond); \ + } while(0) + + +#if OPAL_ENABLE_DEBUG +#define OPAL_PMIX_ACQUIRE_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + if (opal_debug_threads) { \ + opal_output(0, "Waiting for thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + while ((lck)->active) { \ + opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + if (opal_debug_threads) { \ + opal_output(0, "Thread obtained %s:%d", \ + __FILE__, __LINE__); \ + } \ + (lck)->active = true; \ + } while(0) +#else +#define OPAL_PMIX_ACQUIRE_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + while ((lck)->active) { \ + opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + (lck)->active = true; \ + } while(0) +#endif + + +#if OPAL_ENABLE_DEBUG +#define OPAL_PMIX_WAIT_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + if (opal_debug_threads) { \ + opal_output(0, "Waiting for thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + while ((lck)->active) { \ + opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + if (opal_debug_threads) { \ + opal_output(0, "Thread obtained %s:%d", \ + __FILE__, __LINE__); \ + } \ + OPAL_ACQUIRE_OBJECT(&lck); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#else +#define OPAL_PMIX_WAIT_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + while ((lck)->active) { \ + opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + OPAL_ACQUIRE_OBJECT(lck); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#endif + + +#if OPAL_ENABLE_DEBUG +#define OPAL_PMIX_RELEASE_THREAD(lck) \ + do { \ + if (opal_debug_threads) { \ + opal_output(0, "Releasing thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + (lck)->active = false; \ + opal_pmix_condition_broadcast(&(lck)->cond); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#else +#define OPAL_PMIX_RELEASE_THREAD(lck) \ + do { \ + assert(0 != opal_mutex_trylock(&(lck)->mutex)); \ + (lck)->active = false; \ + opal_pmix_condition_broadcast(&(lck)->cond); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#endif + + +#define OPAL_PMIX_WAKEUP_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + (lck)->active = false; \ + OPAL_POST_OBJECT(lck); \ + opal_pmix_condition_broadcast(&(lck)->cond); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) + END_C_DECLS #endif diff --git a/opal/mca/pmix/base/pmix_base_fns.c b/opal/mca/pmix/base/pmix_base_fns.c index 6577f680dfb..97be9c381d0 100644 --- a/opal/mca/pmix/base/pmix_base_fns.c +++ b/opal/mca/pmix/base/pmix_base_fns.c @@ -92,39 +92,6 @@ int opal_pmix_base_notify_event(int status, return OPAL_SUCCESS; } -struct lookup_caddy_t { - volatile bool active; - int status; - opal_pmix_pdata_t *pdat; -}; - -/******** DATA EXCHANGE ********/ -static void lookup_cbfunc(int status, opal_list_t *data, void *cbdata) -{ - struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata; - cd->status = status; - if (OPAL_SUCCESS == status && NULL != data) { - opal_pmix_pdata_t *p = (opal_pmix_pdata_t*)opal_list_get_first(data); - if (NULL != p) { - cd->pdat->proc = p->proc; - if (p->value.type == cd->pdat->value.type) { - if (NULL != cd->pdat->value.key) { - free(cd->pdat->value.key); - } - (void)opal_value_xfer(&cd->pdat->value, &p->value); - } - } - } - cd->active = false; -} - -static void opcbfunc(int status, void *cbdata) -{ - struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata; - cd->status = status; - cd->active = false; -} - int opal_pmix_base_exchange(opal_value_t *indat, opal_pmix_pdata_t *outdat, int timeout) @@ -133,8 +100,6 @@ int opal_pmix_base_exchange(opal_value_t *indat, opal_list_t ilist, mlist; opal_value_t *info; opal_pmix_pdata_t *pdat; - struct lookup_caddy_t caddy; - char **keys; /* protect the incoming value */ opal_dss.copy((void**)&info, indat, OPAL_VALUE); @@ -148,29 +113,10 @@ int opal_pmix_base_exchange(opal_value_t *indat, opal_list_append(&ilist, &info->super); /* publish it with "session" scope */ - if (NULL == opal_pmix.publish_nb) { - rc = opal_pmix.publish(&ilist); - OPAL_LIST_DESTRUCT(&ilist); - if (OPAL_SUCCESS != rc) { - return rc; - } - } else { - caddy.status = -1; - caddy.active = true; - caddy.pdat = NULL; - rc = opal_pmix.publish_nb(&ilist, opcbfunc, &caddy); - if (OPAL_SUCCESS != rc) { - OPAL_LIST_DESTRUCT(&ilist); - return rc; - } - while (caddy.active) { - usleep(10); - } - OPAL_LIST_DESTRUCT(&ilist); - if (OPAL_SUCCESS != caddy.status) { - OPAL_ERROR_LOG(caddy.status); - return caddy.status; - } + rc = opal_pmix.publish(&ilist); + OPAL_LIST_DESTRUCT(&ilist); + if (OPAL_SUCCESS != rc) { + return rc; } /* lookup the other side's info - if a non-blocking form @@ -204,43 +150,20 @@ int opal_pmix_base_exchange(opal_value_t *indat, /* if a non-blocking version of lookup isn't * available, then use the blocking version */ - if (NULL == opal_pmix.lookup_nb) { - OBJ_CONSTRUCT(&ilist, opal_list_t); - opal_list_append(&ilist, &pdat->super); - rc = opal_pmix.lookup(&ilist, &mlist); - OPAL_LIST_DESTRUCT(&mlist); + OBJ_CONSTRUCT(&ilist, opal_list_t); + opal_list_append(&ilist, &pdat->super); + rc = opal_pmix.lookup(&ilist, &mlist); + OPAL_LIST_DESTRUCT(&mlist); + if (OPAL_SUCCESS != rc) { OPAL_LIST_DESTRUCT(&ilist); - if (OPAL_SUCCESS != rc) { - return rc; - } - } else { - caddy.status = -1; - caddy.active = true; - caddy.pdat = pdat; - keys = NULL; - opal_argv_append_nosize(&keys, pdat->value.key); - rc = opal_pmix.lookup_nb(keys, &mlist, lookup_cbfunc, &caddy); - if (OPAL_SUCCESS != rc) { - OPAL_LIST_DESTRUCT(&mlist); - opal_argv_free(keys); - return rc; - } - while (caddy.active) { - usleep(10); - } - opal_argv_free(keys); - OPAL_LIST_DESTRUCT(&mlist); - if (OPAL_SUCCESS != caddy.status) { - OPAL_ERROR_LOG(caddy.status); - return caddy.status; - } + return rc; } /* pass back the result */ outdat->proc = pdat->proc; free(outdat->value.key); rc = opal_value_xfer(&outdat->value, &pdat->value); - OBJ_RELEASE(pdat); + OPAL_LIST_DESTRUCT(&ilist); return rc; } diff --git a/opal/mca/pmix/base/pmix_base_frame.c b/opal/mca/pmix/base/pmix_base_frame.c index f767391249c..eaec152edc9 100644 --- a/opal/mca/pmix/base/pmix_base_frame.c +++ b/opal/mca/pmix/base/pmix_base_frame.c @@ -13,6 +13,7 @@ #include "opal/constants.h" #include "opal/mca/mca.h" +#include "opal/threads/thread_usage.h" #include "opal/util/argv.h" #include "opal/util/output.h" #include "opal/mca/base/base.h" @@ -35,7 +36,16 @@ opal_pmix_base_module_t opal_pmix = { 0 }; bool opal_pmix_collect_all_data = true; int opal_pmix_verbose_output = -1; bool opal_pmix_base_async_modex = false; -opal_pmix_base_t opal_pmix_base = {0}; +opal_pmix_base_t opal_pmix_base = { + .evbase = NULL, + .timeout = 0, + .initialized = 0, + .lock = { + .mutex = OPAL_MUTEX_STATIC_INIT, + .cond = OPAL_PMIX_CONDITION_STATIC_INIT, + .active = false + } +}; static int opal_pmix_base_frame_register(mca_base_register_flag_t flags) { diff --git a/opal/mca/pmix/pmix.h b/opal/mca/pmix/pmix.h index a3940ae402e..7e7e13fda5d 100644 --- a/opal/mca/pmix/pmix.h +++ b/opal/mca/pmix/pmix.h @@ -146,7 +146,7 @@ extern int opal_pmix_base_exchange(opal_value_t *info, OPAL_NAME_PRINT(*(p)), (s))); \ OBJ_CONSTRUCT(&(_ilist), opal_list_t); \ _info = OBJ_NEW(opal_value_t); \ - _info->key = strdup(OPAL_PMIX_OPTIONAL); \ + _info->key = strdup(OPAL_PMIX_IMMEDIATE); \ _info->type = OPAL_BOOL; \ _info->data.flag = true; \ opal_list_append(&(_ilist), &(_info)->super); \ diff --git a/opal/mca/pmix/pmix2x/pmix/AUTHORS b/opal/mca/pmix/pmix2x/pmix/AUTHORS index c429d324c00..581a22ec73a 100644 --- a/opal/mca/pmix/pmix2x/pmix/AUTHORS +++ b/opal/mca/pmix/pmix2x/pmix/AUTHORS @@ -9,22 +9,31 @@ Email Name Affiliation(s) alinask Elena Shipunova Mellanox annu13 Annapurna Dasari Intel artpol84 Artem Polyakov Mellanox +ashleypittman Ashley Pittman Intel dsolt Dave Solt IBM +garlick Jim Garlick LLNL ggouaillardet Gilles Gouaillardet RIST hjelmn Nathan Hjelm LANL igor-ivanov Igor Ivanov Mellanox jladd-mlnx Joshua Ladd Mellanox -jsquyres Jeff Squyres Cisco, IU +jjhursey Joshua Hursey IBM +jsquyres Jeff Squyres Cisco +karasevb Boris Karasev Mellanox +kawashima-fj Takahiro Kawashima Fujitsu nkogteva Nadezhda Kogteva Mellanox -rhc54 Ralph Castain LANL, Cisco, Intel +nysal Nysal Jan KA IBM +PHHargrove Paul Hargrove LBNL +rhc54 Ralph Castain Intel ------------------------------- --------------------------- ------------------- Affiliation abbreviations: -------------------------- Cisco = Cisco Systems, Inc. +Fujitsu = Fujitsu IBM = International Business Machines, Inc. Intel = Intel, Inc. -IU = Indiana University LANL = Los Alamos National Laboratory +LBNL = Lawrence Berkeley National Laboratory +LLNL = Lawrence Livermore National Laboratory Mellanox = Mellanox RIST = Research Organization for Information Science and Technology diff --git a/opal/mca/pmix/pmix2x/pmix/INSTALL b/opal/mca/pmix/pmix2x/pmix/INSTALL index 005301463ff..e1fc5e3f6db 100644 --- a/opal/mca/pmix/pmix2x/pmix/INSTALL +++ b/opal/mca/pmix/pmix2x/pmix/INSTALL @@ -9,7 +9,7 @@ Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, Copyright (c) 2004-2005 The Regents of the University of California. All rights reserved. Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. -Copyright (c) 2013-2015 Intel, Inc. All rights reserved. +Copyright (c) 2013-2017 Intel, Inc. All rights reserved. $COPYRIGHT$ Additional copyrights may follow @@ -24,7 +24,7 @@ This file is a *very* short overview of building and installing the PMIx library. Much more information is available on the PMIx web site (e.g., see the FAQ section): - http://pmix.github.io/pmix/master + http://pmix.github.io/pmix/pmix Developer Builds @@ -34,7 +34,7 @@ If you have checked out a DEVELOPER'S COPY of PMIx (i.e., you checked out from Git), you should read the HACKING file before attempting to build PMIx. You must then run: -shell$ ./autogen.sh +shell$ ./autogen.pl You will need very recent versions of GNU Autoconf, Automake, and Libtool. If autogen.sh fails, read the HACKING file. If anything @@ -85,4 +85,3 @@ shell$ make install Parallel make is generally only helpful in the build phase; the installation process is mostly serial and does not benefit much from parallel make. - diff --git a/opal/mca/pmix/pmix2x/pmix/NEWS b/opal/mca/pmix/pmix2x/pmix/NEWS index 86f4438f1bb..4df8ad3aae6 100644 --- a/opal/mca/pmix/pmix2x/pmix/NEWS +++ b/opal/mca/pmix/pmix2x/pmix/NEWS @@ -24,6 +24,65 @@ current release as well as the "stable" bug fix release branch. Master (not on release branches yet) ------------------------------------ + +2.0.0 +------ +**** NOTE: This release implements the complete PMIX v2.0 Standard +**** and therefore includes a number of new APIs and features. These +**** can be tracked by their RFC's in the RFC repository at: +**** https://github.com/pmix/RFCs. A formal standards document will +**** be included in a later v2.x release. Some of the changes are +**** identified below. +- Added the Modular Component Architecture (MCA) plugin manager and + converted a number of operations to plugins, thereby allowing easy + customization and extension (including proprietary offerings) +- Added support for TCP sockets instead of Unix domain sockets for + client-server communications +- Added support for on-the-fly Allocation requests, including requests + for additional resources, extension of time for currently allocated + resources, and return of identified allocated resources to the scheduler + (RFC 0005 - https://github.com/pmix/RFCs/blob/master/RFC0005.md) +- Tightened rules on the processing of PMIx_Get requests, including + reservation of the "pmix" prefix for attribute keys and specifying + behaviors associated with the PMIX_RANK_WILDCARD value + (RFC 0009 - https://github.com/pmix/RFCs/blob/master/RFC0009.md) +- Extended support for tool interactions with a PMIx server aimed at + meeting the needs of debuggers and other tools. Includes support + for rendezvousing with a system-level PMIx server for interacting + with the system management stack (SMS) outside of an allocated + session, and adds two new APIs: + - PMIx_Query: request general information such as the process + table for a specified job, and available SMS capabilities + - PMIx_Log: log messages (e.g., application progress) to a + system-hosted persistent store + (RFC 0010 - https://github.com/pmix/RFCs/blob/master/RFC0010.md) +- Added support for fabric/network interactions associated with + "instant on" application startup + (RFC 0012 - https://github.com/pmix/RFCs/blob/master/RFC0012.md) +- Added an attribute to support getting the time remaining in an + allocation via the PMIx_Query interface + (RFC 0013 - https://github.com/pmix/RFCs/blob/master/RFC0013.md) +- Added interfaces to support job control and monitoring requests, + including heartbeat and file monitors to detect stalled applications. + Job control interface supports standard signal-related operations + (pause, kill, resume, etc.) as well as checkpoint/restart requests. + The interface can also be used by an application to indicate it is + willing to be pre-empted, with the host RM providing an event + notification when the preemption is desired. + (RFC 0015 - https://github.com/pmix/RFCs/blob/master/RFC0015.md) +- Extended the event notification system to support notifications + across threads in the same process, and the ability to direct + ordering of notifications when registering event handlers. + (RFC 0018 - https://github.com/pmix/RFCs/blob/master/RFC0018.md) +- Expose the buffer manipulation functions via a new set of APIs + to support heterogeneous data transfers within the host RM + environment + (RFC 0020 - https://github.com/pmix/RFCs/blob/master/RFC0020.md) +- Fix a number of race condition issues that arose at scale +- Enable PMIx servers to generate notifications to the host RM + and to themselves + + 1.2.2 -- 21 March 2017 ---------------------- - Compiler fix for Sun/Oracle CC (PR #322) diff --git a/opal/mca/pmix/pmix2x/pmix/VERSION b/opal/mca/pmix/pmix2x/pmix/VERSION index 53fe2266741..c3dd7d08258 100644 --- a/opal/mca/pmix/pmix2x/pmix/VERSION +++ b/opal/mca/pmix/pmix2x/pmix/VERSION @@ -30,7 +30,7 @@ greek= # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git071ebc3 +repo_rev=git6fb501d # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Jun 06, 2017" +date="Jun 19, 2017" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library @@ -75,4 +75,4 @@ date="Jun 06, 2017" # Version numbers are described in the Libtool current:revision:age # format. -libpmix_so_version=0:0:0 +libpmix_so_version=3:0:1 diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 index 395b78406fd..fe800619c6b 100644 --- a/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 @@ -167,6 +167,8 @@ AC_DEFUN([PMIX_SETUP_CORE],[ ############################################################################ pmix_show_title "Compiler and preprocessor tests" + PMIX_SETUP_CC + # # Check for some types # diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix_functions.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix_functions.m4 index 84c04741f6a..ce83b3b207b 100644 --- a/opal/mca/pmix/pmix2x/pmix/config/pmix_functions.m4 +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix_functions.m4 @@ -95,7 +95,7 @@ EOF # PMIX_CONFIGURE_USER="`whoami`" -PMIX_CONFIGURE_HOST="`hostname | head -n 1`" +PMIX_CONFIGURE_HOST="`(hostname || uname -n) 2> /dev/null | sed 1q`" PMIX_CONFIGURE_DATE="`date`" # @@ -115,7 +115,7 @@ AC_DEFUN([PMIX_BASIC_SETUP],[ # PMIX_CONFIGURE_USER="`whoami`" -PMIX_CONFIGURE_HOST="`hostname | head -n 1`" +PMIX_CONFIGURE_HOST="`(hostname || uname -n) 2> /dev/null | sed 1q`" PMIX_CONFIGURE_DATE="`date`" # diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h index 16e18e68ee7..e2cc36d8a3f 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h @@ -892,81 +892,83 @@ typedef struct pmix_value { } while (0) /* release the memory in the value struct data field */ -#define PMIX_VALUE_DESTRUCT(m) \ - do { \ - size_t _n; \ - if (PMIX_STRING == (m)->type) { \ - if (NULL != (m)->data.string) { \ - free((m)->data.string); \ - } \ - } else if ((PMIX_BYTE_OBJECT == (m)->type) || \ - (PMIX_COMPRESSED_STRING == (m)->type)) { \ - if (NULL != (m)->data.bo.bytes) { \ - free((m)->data.bo.bytes); \ - } \ - } else if (PMIX_DATA_ARRAY == (m)->type) { \ - if (PMIX_STRING == (m)->data.darray->type) { \ - char **_str = (char**)(m)->data.darray->array; \ - for (_n=0; _n < (m)->data.darray->size; _n++) { \ - if (NULL != _str[_n]) { \ - free(_str[_n]); \ - } \ - } \ - } else if (PMIX_PROC_INFO == (m)->data.darray->type) { \ - pmix_proc_info_t *_info = \ - (pmix_proc_info_t*)(m)->data.darray->array; \ - for (_n=0; _n < (m)->data.darray->size; _n++) { \ - PMIX_PROC_INFO_DESTRUCT(&_info[_n]); \ - } \ - } else if (PMIX_INFO == (m)->data.darray->type) { \ - pmix_info_t *_info = \ - (pmix_info_t*)(m)->data.darray->array; \ - for (_n=0; _n < (m)->data.darray->size; _n++) { \ - /* cannot use info destruct as that loops back */ \ - if (PMIX_STRING == _info[_n].value.type) { \ - if (NULL != _info[_n].value.data.string) { \ - free(_info[_n].value.data.string); \ - } \ - } else if (PMIX_BYTE_OBJECT == _info[_n].value.type) { \ - if (NULL != _info[_n].value.data.bo.bytes) { \ - free(_info[_n].value.data.bo.bytes); \ - } \ - } else if (PMIX_PROC_INFO == _info[_n].value.type) { \ - PMIX_PROC_INFO_DESTRUCT(_info[_n].value.data.pinfo); \ - } \ - } \ - } else if (PMIX_BYTE_OBJECT == (m)->data.darray->type) { \ - pmix_byte_object_t *_obj = \ - (pmix_byte_object_t*)(m)->data.darray->array; \ - for (_n=0; _n < (m)->data.darray->size; _n++) { \ - if (NULL != _obj[_n].bytes) { \ - free(_obj[_n].bytes); \ - } \ - } \ - } \ - if (NULL != (m)->data.darray->array) { \ - free((m)->data.darray->array); \ - } \ - free((m)->data.darray); \ - /**** DEPRECATED ****/ \ - } else if (PMIX_INFO_ARRAY == (m)->type) { \ - pmix_info_t *_p = (pmix_info_t*)((m)->data.array->array); \ - for (_n=0; _n < (m)->data.array->size; _n++) { \ - if (PMIX_STRING == _p[_n].value.type) { \ - if (NULL != _p[_n].value.data.string) { \ - free(_p[_n].value.data.string); \ - } \ - } else if (PMIX_BYTE_OBJECT == _p[_n].value.type) { \ - if (NULL != _p[_n].value.data.bo.bytes) { \ - free(_p[_n].value.data.bo.bytes); \ - } \ - } else if (PMIX_PROC_INFO == _p[_n].value.type) { \ - PMIX_PROC_INFO_DESTRUCT(_p[_n].value.data.pinfo); \ - } \ - } \ - free(_p); \ - /********************/ \ - } \ +#define PMIX_VALUE_DESTRUCT(m) \ + do { \ + size_t _n; \ + if (PMIX_STRING == (m)->type) { \ + if (NULL != (m)->data.string) { \ + free((m)->data.string); \ + } \ + } else if ((PMIX_BYTE_OBJECT == (m)->type) || \ + (PMIX_COMPRESSED_STRING == (m)->type)) { \ + if (NULL != (m)->data.bo.bytes) { \ + free((m)->data.bo.bytes); \ + } \ + } else if (PMIX_DATA_ARRAY == (m)->type) { \ + if (NULL != (m)->data.darray) { \ + if (PMIX_STRING == (m)->data.darray->type) { \ + char **_str = (char**)(m)->data.darray->array; \ + for (_n=0; _n < (m)->data.darray->size; _n++) { \ + if (NULL != _str[_n]) { \ + free(_str[_n]); \ + } \ + } \ + } else if (PMIX_PROC_INFO == (m)->data.darray->type) { \ + pmix_proc_info_t *_info = \ + (pmix_proc_info_t*)(m)->data.darray->array; \ + for (_n=0; _n < (m)->data.darray->size; _n++) { \ + PMIX_PROC_INFO_DESTRUCT(&_info[_n]); \ + } \ + } else if (PMIX_INFO == (m)->data.darray->type) { \ + pmix_info_t *_info = \ + (pmix_info_t*)(m)->data.darray->array; \ + for (_n=0; _n < (m)->data.darray->size; _n++) { \ + /* cannot use info destruct as that loops back */ \ + if (PMIX_STRING == _info[_n].value.type) { \ + if (NULL != _info[_n].value.data.string) { \ + free(_info[_n].value.data.string); \ + } \ + } else if (PMIX_BYTE_OBJECT == _info[_n].value.type) { \ + if (NULL != _info[_n].value.data.bo.bytes) { \ + free(_info[_n].value.data.bo.bytes); \ + } \ + } else if (PMIX_PROC_INFO == _info[_n].value.type) { \ + PMIX_PROC_INFO_DESTRUCT(_info[_n].value.data.pinfo); \ + } \ + } \ + } \ + } else if (PMIX_BYTE_OBJECT == (m)->data.darray->type) { \ + pmix_byte_object_t *_obj = \ + (pmix_byte_object_t*)(m)->data.darray->array; \ + for (_n=0; _n < (m)->data.darray->size; _n++) { \ + if (NULL != _obj[_n].bytes) { \ + free(_obj[_n].bytes); \ + } \ + } \ + } \ + if (NULL != (m)->data.darray->array) { \ + free((m)->data.darray->array); \ + } \ + free((m)->data.darray); \ + /**** DEPRECATED ****/ \ + } else if (PMIX_INFO_ARRAY == (m)->type) { \ + pmix_info_t *_p = (pmix_info_t*)((m)->data.array->array); \ + for (_n=0; _n < (m)->data.array->size; _n++) { \ + if (PMIX_STRING == _p[_n].value.type) { \ + if (NULL != _p[_n].value.data.string) { \ + free(_p[_n].value.data.string); \ + } \ + } else if (PMIX_BYTE_OBJECT == _p[_n].value.type) { \ + if (NULL != _p[_n].value.data.bo.bytes) { \ + free(_p[_n].value.data.bo.bytes); \ + } \ + } else if (PMIX_PROC_INFO == _p[_n].value.type) { \ + PMIX_PROC_INFO_DESTRUCT(_p[_n].value.data.pinfo); \ + } \ + } \ + free(_p); \ + /********************/ \ + } \ } while (0) #define PMIX_VALUE_FREE(m, n) \ diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_object.h b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_object.h index 740da76ca10..da4d4ca2dd9 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_object.h +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_object.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -127,6 +127,7 @@ #include #endif /* HAVE_STDLIB_H */ +#include "src/threads/thread_usage.h" BEGIN_C_DECLS @@ -496,7 +497,7 @@ static inline pmix_object_t *pmix_obj_new(pmix_class_t * cls) static inline int pmix_obj_update(pmix_object_t *object, int inc) __pmix_attribute_always_inline__; static inline int pmix_obj_update(pmix_object_t *object, int inc) { - return object->obj_reference_count += inc; + return PMIX_THREAD_ADD32(&object->obj_reference_count, inc); } END_C_DECLS diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c index b1e9a6fe6fb..3bf71848cd4 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c @@ -167,20 +167,18 @@ static void pmix_client_notify_recv(struct pmix_peer_t *peer, } -pmix_client_globals_t pmix_client_globals = {{{0}}}; -pmix_mutex_t pmix_client_bootstrap_mutex = PMIX_MUTEX_STATIC_INIT; +pmix_client_globals_t pmix_client_globals = {0}; /* callback for wait completion */ static void wait_cbfunc(struct pmix_peer_t *pr, pmix_ptl_hdr_t *hdr, pmix_buffer_t *buf, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + pmix_lock_t *lock = (pmix_lock_t*)cbdata; pmix_output_verbose(2, pmix_globals.debug_output, "pmix:client wait_cbfunc received"); - PMIX_POST_OBJECT(active); - *active = false; + PMIX_WAKEUP_THREAD(lock); } /* callback to receive job info */ @@ -199,7 +197,7 @@ static void job_data(struct pmix_peer_t *pr, PMIX_ERROR_LOG(rc); cb->status = PMIX_ERROR; PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); return; } assert(NULL != nspace); @@ -211,7 +209,7 @@ static void job_data(struct pmix_peer_t *pr, #endif cb->status = PMIX_SUCCESS; PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } PMIX_EXPORT const char* PMIx_Get_version(void) @@ -219,7 +217,6 @@ PMIX_EXPORT const char* PMIx_Get_version(void) return pmix_version_string; } -volatile bool waiting_for_debugger = true; static void notification_fn(size_t evhdlr_registration_id, pmix_status_t status, const pmix_proc_t *source, @@ -228,18 +225,13 @@ static void notification_fn(size_t evhdlr_registration_id, pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) { + pmix_lock_t *reglock = (pmix_lock_t*)cbdata; + if (NULL != cbfunc) { cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); } - waiting_for_debugger = false; -} -static void evhandler_reg_callbk(pmix_status_t status, - size_t evhandler_ref, - void *cbdata) -{ - volatile int *active = (volatile int*)cbdata; - PMIX_POST_OBJECT(active); - *active = status; + PMIX_WAKEUP_THREAD(reglock); + } typedef struct { @@ -324,19 +316,19 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, pmix_cb_t cb; pmix_buffer_t *req; pmix_cmd_t cmd = PMIX_REQ_CMD; - volatile int active; pmix_status_t code = PMIX_ERR_DEBUGGER_RELEASE; pmix_proc_t wildcard; pmix_info_t ginfo; pmix_value_t *val = NULL; + pmix_lock_t reglock; if (NULL == proc) { return PMIX_ERR_BAD_PARAM; } - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); - if (0 < pmix_globals.init_cntr || PMIX_PROC_SERVER == pmix_globals.proc_type) { + if (0 < pmix_globals.init_cntr || PMIX_PROC_IS_SERVER) { /* since we have been called before, the nspace and * rank should be known. So return them here if * requested */ @@ -344,19 +336,19 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, (void)strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); proc->rank = pmix_globals.myid.rank; } + ++pmix_globals.init_cntr; /* we also need to check the info keys to see if something need * be done with them - e.g., to notify another library that we * also have called init */ + PMIX_RELEASE_THREAD(&pmix_global_lock); if (NULL != info) { _check_for_notify(info, ninfo); } - ++pmix_globals.init_cntr; - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return PMIX_SUCCESS; } /* if we don't see the required info, then we cannot init */ if (NULL == getenv("PMIX_NAMESPACE")) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INVALID_NAMESPACE; } @@ -365,13 +357,17 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, if (PMIX_SUCCESS != (rc = pmix_rte_init(PMIX_PROC_CLIENT, info, ninfo, pmix_client_notify_recv))) { PMIX_ERROR_LOG(rc); - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } /* setup the globals */ PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t); - PMIX_CONSTRUCT(&pmix_client_globals.myserver, pmix_peer_t); + pmix_client_globals.myserver = PMIX_NEW(pmix_peer_t); + if (NULL == pmix_client_globals.myserver) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_NOMEM; + } pmix_output_verbose(2, pmix_globals.debug_output, "pmix: init called"); @@ -379,7 +375,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, /* we require our nspace */ if (NULL == (evar = getenv("PMIX_NAMESPACE"))) { /* let the caller know that the server isn't available yet */ - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INVALID_NAMESPACE; } if (NULL != proc) { @@ -393,7 +389,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, /* we also require our rank */ if (NULL == (evar = getenv("PMIX_RANK"))) { /* let the caller know that the server isn't available yet */ - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_DATA_VALUE_NOT_FOUND; } pmix_globals.myid.rank = strtol(evar, NULL, 10); @@ -407,25 +403,27 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, * to us at launch */ evar = getenv("PMIX_SECURITY_MODE"); if (PMIX_SUCCESS != (rc = pmix_psec.assign_module(pmix_globals.mypeer, evar))) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* the server will be using the same */ - pmix_client_globals.myserver.compat.psec = pmix_globals.mypeer->compat.psec; + pmix_client_globals.myserver->compat.psec = pmix_globals.mypeer->compat.psec; /* setup the shared memory support */ #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) if (PMIX_SUCCESS != (rc = pmix_dstore_init(NULL, 0))) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_DATA_VALUE_NOT_FOUND; } #endif /* PMIX_ENABLE_DSTORE */ /* connect to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.connect_to_peer(&pmix_client_globals.myserver, info, ninfo))){ - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + if (PMIX_SUCCESS != (rc = pmix_ptl.connect_to_peer(pmix_client_globals.myserver, info, ninfo))){ + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } + /* mark that we are using the same module as used for the server */ + pmix_globals.mypeer->compat.ptl = pmix_client_globals.myserver->compat.ptl; /* send a request for our job info - we do this as a non-blocking * transaction because some systems cannot handle very large @@ -434,28 +432,28 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(req, &cmd, 1, PMIX_CMD))) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(req); - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } /* send to the server */ PMIX_CONSTRUCT(&cb, pmix_cb_t); - cb.active = true; - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, req, job_data, (void*)&cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, req, job_data, (void*)&cb))){ PMIX_DESTRUCT(&cb); - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } /* wait for the data to return */ - PMIX_WAIT_FOR_COMPLETION(cb.active); + PMIX_WAIT_THREAD(&cb.lock); rc = cb.status; PMIX_DESTRUCT(&cb); if (PMIX_SUCCESS == rc) { pmix_globals.init_cntr++; } else { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* lood for a debugger attach key */ (void)strncpy(wildcard.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); @@ -464,18 +462,13 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, if (PMIX_SUCCESS == PMIx_Get(&wildcard, PMIX_DEBUG_STOP_IN_INIT, &ginfo, 1, &val)) { PMIX_VALUE_FREE(val, 1); // cleanup memory /* if the value was found, then we need to wait for debugger attach here */ - /* register for the debugger release notificaation */ - active = -1; + /* register for the debugger release notification */ + PMIX_CONSTRUCT_LOCK(®lock); PMIx_Register_event_handler(&code, 1, NULL, 0, - notification_fn, evhandler_reg_callbk, (void*)&active); - while (-1 == active) { - usleep(100); - } - if (0 != active) { - return active; - } + notification_fn, NULL, (void*)®lock); /* wait for it to arrive */ - PMIX_WAIT_FOR_COMPLETION(waiting_for_debugger); + PMIX_WAIT_THREAD(®lock); + PMIX_DESTRUCT_LOCK(®lock); } PMIX_INFO_DESTRUCT(&ginfo); @@ -484,46 +477,81 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, _check_for_notify(info, ninfo); } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); - return PMIX_SUCCESS; } PMIX_EXPORT int PMIx_Initialized(void) { - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (0 < pmix_globals.init_cntr) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return true; } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return false; } +typedef struct { + pmix_lock_t lock; + pmix_event_t ev; + bool active; +} pmix_client_timeout_t; + +/* timer callback */ +static void fin_timeout(int sd, short args, void *cbdata) +{ + pmix_client_timeout_t *tev; + tev = (pmix_client_timeout_t*)cbdata; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:client finwait timeout fired"); + if (tev->active) { + tev->active = false; + PMIX_WAKEUP_THREAD(&tev->lock); + } +} +/* callback for finalize completion */ +static void finwait_cbfunc(struct pmix_peer_t *pr, + pmix_ptl_hdr_t *hdr, + pmix_buffer_t *buf, void *cbdata) +{ + pmix_client_timeout_t *tev; + tev = (pmix_client_timeout_t*)cbdata; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:client finwait_cbfunc received"); + if (tev->active) { + tev->active = false; + PMIX_WAKEUP_THREAD(&tev->lock); + } +} + PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) { pmix_buffer_t *msg; pmix_cmd_t cmd = PMIX_FINALIZE_CMD; pmix_status_t rc; size_t n; - volatile bool active; + pmix_client_timeout_t tev; + struct timeval tv = {2, 0}; - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (1 != pmix_globals.init_cntr) { --pmix_globals.init_cntr; - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; } pmix_globals.init_cntr = 0; pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:client finalize called"); + "%s:%d pmix:client finalize called", + pmix_globals.myid.nspace, pmix_globals.myid.rank); /* mark that I called finalize */ pmix_globals.mypeer->finalized = true; - if ( 0 <= pmix_client_globals.myserver.sd ) { + if ( 0 <= pmix_client_globals.myserver->sd ) { /* check to see if we are supposed to execute a * blocking fence prior to actually finalizing */ if (NULL != info && 0 < ninfo) { @@ -544,7 +572,6 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) } } } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); /* setup a cmd message to notify the PMIx * server that we are normally terminating */ @@ -558,22 +585,33 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:client sending finalize sync to server"); - + "%s:%d pmix:client sending finalize sync to server", + pmix_globals.myid.nspace, pmix_globals.myid.rank); + + /* setup a timer to protect ourselves should the server be unable + * to answer for some reason */ + PMIX_CONSTRUCT_LOCK(&tev.lock); + pmix_event_assign(&tev.ev, pmix_globals.evbase, -1, 0, + fin_timeout, &tev); + tev.active = true; + PMIX_POST_OBJECT(&tev); + pmix_event_add(&tev.ev, &tv); /* send to the server */ - active = true;; - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, - wait_cbfunc, (void*)&active))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, + finwait_cbfunc, (void*)&tev))){ return rc; } /* wait for the ack to return */ - PMIX_WAIT_FOR_COMPLETION(active); + PMIX_WAIT_THREAD(&tev.lock); + PMIX_DESTRUCT_LOCK(&tev.lock); + if (tev.active) { + pmix_event_del(&tev.ev); + } + pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:client finalize sync received"); - } - else { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + "%s:%d pmix:client finalize sync received", + pmix_globals.myid.nspace, pmix_globals.myid.rank); } if (!pmix_globals.external_evbase) { @@ -584,8 +622,6 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) (void)pmix_progress_thread_pause(NULL); } - PMIX_DESTRUCT(&pmix_client_globals.myserver); - #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) if (0 > (rc = pmix_dstore_nspace_del(pmix_globals.myid.nspace))) { PMIX_ERROR_LOG(rc); @@ -595,11 +631,16 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) PMIX_LIST_DESTRUCT(&pmix_client_globals.pending_requests); - if (0 <= pmix_client_globals.myserver.sd) { - CLOSE_THE_SOCKET(pmix_client_globals.myserver.sd); + if (0 <= pmix_client_globals.myserver->sd) { + CLOSE_THE_SOCKET(pmix_client_globals.myserver->sd); } + if (NULL != pmix_client_globals.myserver) { + PMIX_RELEASE(pmix_client_globals.myserver); + } + pmix_rte_finalize(); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; } @@ -610,23 +651,23 @@ PMIX_EXPORT pmix_status_t PMIx_Abort(int flag, const char msg[], pmix_buffer_t *bfr; pmix_cmd_t cmd = PMIX_ABORT_CMD; pmix_status_t rc; - volatile bool active; + pmix_lock_t reglock; pmix_output_verbose(2, pmix_globals.debug_output, "pmix:client abort called"); - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a buffer to hold the message */ bfr = PMIX_NEW(pmix_buffer_t); @@ -664,14 +705,15 @@ PMIX_EXPORT pmix_status_t PMIx_Abort(int flag, const char msg[], } /* send to the server */ - active = true; - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, bfr, - wait_cbfunc, (void*)&active))){ + PMIX_CONSTRUCT_LOCK(®lock); + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, bfr, + wait_cbfunc, (void*)®lock))){ return rc; } /* wait for the release */ - PMIX_WAIT_FOR_COMPLETION(active); + PMIX_WAIT_THREAD(®lock); + PMIX_DESTRUCT_LOCK(®lock); return PMIX_SUCCESS; } @@ -766,7 +808,7 @@ static void _putfn(int sd, short args, void *cbdata) cb->pstatus = rc; /* post the data so the receiving thread can acquire it */ PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } PMIX_EXPORT pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_value_t *val) @@ -778,16 +820,15 @@ PMIX_EXPORT pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_va "pmix: executing put for key %s type %d", key, val->type); - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; cb->scope = scope; cb->key = (char*)key; cb->value = val; @@ -796,7 +837,7 @@ PMIX_EXPORT pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_va PMIX_THREADSHIFT(cb, _putfn); /* wait for the result */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->pstatus; PMIX_RELEASE(cb); @@ -854,8 +895,8 @@ static void _commitfn(int sd, short args, void *cbdata) /* always send, even if we have nothing to contribute, so the server knows * that we contributed whatever we had */ - if (PMIX_SUCCESS == (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msgout, - wait_cbfunc, (void*)&cb->active))){ + if (PMIX_SUCCESS == (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msgout, + wait_cbfunc, (void*)&cb->lock))){ cb->pstatus = PMIX_SUCCESS; return; } @@ -864,7 +905,7 @@ static void _commitfn(int sd, short args, void *cbdata) cb->pstatus = rc; /* post the data so the receiving thread can acquire it */ PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } PMIX_EXPORT pmix_status_t PMIx_Commit(void) @@ -872,32 +913,30 @@ static void _commitfn(int sd, short args, void *cbdata) pmix_cb_t *cb; pmix_status_t rc; - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we are a server, or we aren't connected, don't attempt to send */ if (PMIX_PROC_SERVER == pmix_globals.proc_type) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; // not an error } if (!pmix_globals.connected) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; - /* pass this into the event library for thread protection */ PMIX_THREADSHIFT(cb, _commitfn); /* wait for the result */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->pstatus; PMIX_RELEASE(cb); @@ -974,7 +1013,7 @@ static void _peersfn(int sd, short args, void *cbdata) cb->pstatus = rc; /* post the data so the receiving thread can acquire it */ PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename, @@ -984,16 +1023,15 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename, pmix_cb_t *cb; pmix_status_t rc; - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; cb->key = (char*)nodename; if (NULL != nspace) { (void)strncpy(cb->nspace, nspace, PMIX_MAX_NSLEN); @@ -1003,7 +1041,7 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename, PMIX_THREADSHIFT(cb, _peersfn); /* wait for the result */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->pstatus; /* transfer the result */ *procs = cb->procs; @@ -1047,7 +1085,7 @@ static void _nodesfn(int sd, short args, void *cbdata) cb->pstatus = rc; /* post the data so the receiving thread can acquire it */ PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist) @@ -1055,16 +1093,15 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist pmix_cb_t *cb; pmix_status_t rc; - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; if (NULL != nspace) { (void)strncpy(cb->nspace, nspace, PMIX_MAX_NSLEN); } @@ -1073,7 +1110,7 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist PMIX_THREADSHIFT(cb, _nodesfn); /* wait for the result */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->pstatus; *nodelist = cb->key; PMIX_RELEASE(cb); diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c index 957c8575ee5..50864d7fbc5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c @@ -70,23 +70,27 @@ PMIX_EXPORT pmix_status_t PMIx_Connect(const pmix_proc_t procs[], size_t nprocs, pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: connect called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; /* push the message into our event base to send to the server */ if (PMIX_SUCCESS != (rc = PMIx_Connect_nb(procs, nprocs, info, ninfo, op_cbfunc, cb))) { @@ -95,7 +99,7 @@ PMIX_EXPORT pmix_status_t PMIx_Connect(const pmix_proc_t procs[], size_t nprocs, } /* wait for the connect to complete */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; PMIX_RELEASE(cb); @@ -114,17 +118,22 @@ PMIX_EXPORT pmix_status_t PMIx_Connect_nb(const pmix_proc_t procs[], size_t npro pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: connect called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* check for bozo input */ if (NULL == procs || 0 >= nprocs) { @@ -170,7 +179,7 @@ PMIX_EXPORT pmix_status_t PMIx_Connect_nb(const pmix_proc_t procs[], size_t npro cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -179,25 +188,28 @@ PMIX_EXPORT pmix_status_t PMIx_Connect_nb(const pmix_proc_t procs[], size_t npro } PMIX_EXPORT pmix_status_t PMIx_Disconnect(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo) + const pmix_info_t info[], size_t ninfo) { pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; if (PMIX_SUCCESS != (rc = PMIx_Disconnect_nb(procs, nprocs, info, ninfo, op_cbfunc, cb))) { PMIX_RELEASE(cb); @@ -205,7 +217,7 @@ PMIX_EXPORT pmix_status_t PMIx_Disconnect(const pmix_proc_t procs[], size_t npro } /* wait for the connect to complete */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; PMIX_RELEASE(cb); @@ -224,17 +236,22 @@ PMIX_EXPORT pmix_status_t PMIx_Disconnect_nb(const pmix_proc_t procs[], size_t n pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: disconnect called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* check for bozo input */ if (NULL == procs || 0 >= nprocs) { @@ -280,7 +297,7 @@ PMIX_EXPORT pmix_status_t PMIx_Disconnect_nb(const pmix_proc_t procs[], size_t n cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -347,5 +364,5 @@ static void op_cbfunc(pmix_status_t status, void *cbdata) cb->status = status; PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_fence.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_fence.c index d22c1223d14..72ccdef2955 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_fence.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_fence.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . @@ -66,28 +66,32 @@ static void wait_cbfunc(struct pmix_peer_t *pr, static void op_cbfunc(pmix_status_t status, void *cbdata); PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo) + const pmix_info_t info[], size_t ninfo) { pmix_cb_t *cb; pmix_status_t rc; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: executing fence"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; /* push the message into our event base to send to the server */ if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(procs, nprocs, info, ninfo, @@ -97,7 +101,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs, } /* wait for the fence to complete */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; PMIX_RELEASE(cb); @@ -108,8 +112,8 @@ PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs, } PMIX_EXPORT pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata) + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_buffer_t *msg; pmix_cmd_t cmd = PMIX_FENCENB_CMD; @@ -118,17 +122,22 @@ PMIX_EXPORT pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs pmix_proc_t rg, *rgs; size_t nrg; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: fence_nb called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* check for bozo input */ if (NULL == procs && 0 != nprocs) { @@ -160,7 +169,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -252,6 +261,5 @@ static void op_cbfunc(pmix_status_t status, void *cbdata) pmix_cb_t *cb = (pmix_cb_t*)cbdata; cb->status = status; - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } - diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c index 16fbbda33fa..e0932889707 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c @@ -87,22 +87,25 @@ PMIX_EXPORT pmix_status_t PMIx_Get(const pmix_proc_t *proc, const char key[], pmix_cb_t *cb; pmix_status_t rc; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; if (PMIX_SUCCESS != (rc = PMIx_Get_nb(proc, key, info, ninfo, _value_cbfunc, cb))) { PMIX_RELEASE(cb); return rc; } /* wait for the data to return */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; *val = cb->value; PMIX_RELEASE(cb); @@ -121,9 +124,13 @@ PMIX_EXPORT pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key, int rank; char *nm; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* if the proc is NULL, then the caller is assuming * that the key is universally unique within the caller's @@ -169,7 +176,6 @@ PMIX_EXPORT pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key, /* thread-shift so we can check global objects */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; (void)strncpy(cb->nspace, nm, PMIX_MAX_NSLEN); cb->rank = rank; cb->key = (char*)key; @@ -195,12 +201,12 @@ static void _value_cbfunc(pmix_status_t status, pmix_value_t *kv, void *cbdata) } } PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } static pmix_buffer_t* _pack_get(char *nspace, pmix_rank_t rank, - const pmix_info_t info[], size_t ninfo, - pmix_cmd_t cmd) + const pmix_info_t info[], size_t ninfo, + pmix_cmd_t cmd) { pmix_buffer_t *msg; pmix_status_t rc; @@ -620,8 +626,8 @@ static void _getnbfn(int fd, short flags, void *cbdata) rc = pmix_dstore_fetch(cb->nspace, cb->rank, cb->key, &val); #endif if( PMIX_SUCCESS != rc && !my_nspace ){ - /* we are asking about the job-level info from other - * namespace. It seems tha we don't have it - go and + /* we are asking about the job-level info from another + * namespace. It seems that we don't have it - go and * ask server */ goto request; @@ -687,12 +693,12 @@ static void _getnbfn(int fd, short flags, void *cbdata) goto respond; } -request: + request: /* if we got here, then we don't have the data for this proc. If we * are a server, or we are a client and not connected, then there is * nothing more we can do */ - if (PMIX_PROC_SERVER == pmix_globals.proc_type || - (PMIX_PROC_SERVER != pmix_globals.proc_type && !pmix_globals.connected)) { + if (PMIX_PROC_IS_SERVER || + (!PMIX_PROC_IS_SERVER && !pmix_globals.connected)) { rc = PMIX_ERR_NOT_FOUND; goto respond; } @@ -700,13 +706,14 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* we also have to check the user's directives to see if they do not want * us to attempt to retrieve it from the server */ for (n=0; n < cb->ninfo; n++) { - if (0 == strcmp(cb->info[n].key, PMIX_OPTIONAL) && + if ((0 == strcmp(cb->info[n].key, PMIX_OPTIONAL) || (0 == strcmp(cb->info[n].key, PMIX_IMMEDIATE))) && (PMIX_UNDEF == cb->info[n].value.type || cb->info[n].value.data.flag)) { /* they don't want us to try and retrieve it */ pmix_output_verbose(2, pmix_globals.debug_output, "PMIx_Get key=%s for rank = %d, namespace = %s was not found - request was optional", cb->key, cb->rank, cb->nspace); rc = PMIX_ERR_NOT_FOUND; + val = NULL; goto respond; } } @@ -740,7 +747,7 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* track the callback object */ pmix_list_append(&pmix_client_globals.pending_requests, &cb->super); /* send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, _getnb_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, _getnb_cbfunc, (void*)cb))){ pmix_list_remove_item(&pmix_client_globals.pending_requests, &cb->super); rc = PMIX_ERROR; goto respond; @@ -775,5 +782,4 @@ static void _getnbfn(int fd, short flags, void *cbdata) } PMIX_RELEASE(cb); return; - } diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h index 4fdcf6c2b33..ecf979572c5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h @@ -20,14 +20,12 @@ BEGIN_C_DECLS typedef struct { - pmix_peer_t myserver; // messaging support to/from my server + pmix_peer_t *myserver; // messaging support to/from my server pmix_list_t pending_requests; // list of pmix_cb_t pending data requests } pmix_client_globals_t; PMIX_EXPORT extern pmix_client_globals_t pmix_client_globals; -PMIX_EXPORT extern pmix_mutex_t pmix_client_bootstrap_mutex; - END_C_DECLS #endif /* PMIX_CLIENT_OPS_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c index d6b0183ef92..6981c96e1e5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c @@ -72,21 +72,25 @@ PMIX_EXPORT pmix_status_t PMIx_Publish(const pmix_info_t info[], pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: publish called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object to let us know when it is done */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; if (PMIX_SUCCESS != (rc = PMIx_Publish_nb(info, ninfo, op_cbfunc, cb))) { PMIX_ERROR_LOG(rc); @@ -95,7 +99,7 @@ PMIX_EXPORT pmix_status_t PMIx_Publish(const pmix_info_t info[], } /* wait for the server to ack our request */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = (pmix_status_t)cb->status; PMIX_RELEASE(cb); @@ -110,17 +114,22 @@ PMIX_EXPORT pmix_status_t PMIx_Publish_nb(const pmix_info_t info[], size_t ninfo pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: publish called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* check for bozo cases */ if (NULL == info) { @@ -163,10 +172,9 @@ PMIX_EXPORT pmix_status_t PMIx_Publish_nb(const pmix_info_t info[], size_t ninfo cb = PMIX_NEW(pmix_cb_t); cb->op_cbfunc = cbfunc; cb->cbdata = cbdata; - cb->active = true; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -182,9 +190,23 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup(pmix_pdata_t pdata[], size_t ndata, char **keys = NULL; size_t i; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: lookup called"); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + + /* if we aren't connected, don't attempt to send */ + if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* bozo protection */ if (NULL == pdata) { return PMIX_ERR_BAD_PARAM; @@ -203,7 +225,6 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup(pmix_pdata_t pdata[], size_t ndata, cb = PMIX_NEW(pmix_cb_t); cb->cbdata = (void*)pdata; cb->nvals = ndata; - cb->active = true; if (PMIX_SUCCESS != (rc = PMIx_Lookup_nb(keys, info, ninfo, lookup_cbfunc, cb))) { @@ -213,7 +234,7 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup(pmix_pdata_t pdata[], size_t ndata, } /* wait for the server to ack our request */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); /* the data has been stored in the info array by lookup_cbfunc, so * nothing more for us to do */ @@ -232,13 +253,23 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup_nb(char **keys, pmix_cb_t *cb; size_t nkeys, n; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: lookup called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + /* if we aren't connected, don't attempt to send */ + if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* check for bozo cases */ if (NULL == keys) { return PMIX_ERR_BAD_PARAM; @@ -296,7 +327,7 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup_nb(char **keys, cb->cbdata = cbdata; /* send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_lookup_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_lookup_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -311,14 +342,27 @@ PMIX_EXPORT pmix_status_t PMIx_Unpublish(char **keys, pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: unpublish called"); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + + /* if we aren't connected, don't attempt to send */ + if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; /* push the message into our event base to send to the server */ if (PMIX_SUCCESS != (rc = PMIx_Unpublish_nb(keys, info, ninfo, op_cbfunc, cb))) { @@ -327,7 +371,7 @@ PMIX_EXPORT pmix_status_t PMIx_Unpublish(char **keys, } /* wait for the server to ack our request */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; PMIX_RELEASE(cb); @@ -344,13 +388,23 @@ PMIX_EXPORT pmix_status_t PMIx_Unpublish_nb(char **keys, pmix_cb_t *cb; size_t i, j; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: unpublish called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + /* if we aren't connected, don't attempt to send */ + if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* create the unpublish cmd */ msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ @@ -399,10 +453,9 @@ PMIX_EXPORT pmix_status_t PMIx_Unpublish_nb(char **keys, cb = PMIX_NEW(pmix_cb_t); cb->op_cbfunc = cbfunc; cb->cbdata = cbdata; - cb->active = true; /* send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -442,7 +495,7 @@ static void op_cbfunc(pmix_status_t status, void *cbdata) cb->status = status; PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } static void wait_lookup_cbfunc(struct pmix_peer_t *pr, @@ -539,5 +592,5 @@ static void lookup_cbfunc(pmix_status_t status, pmix_pdata_t pdata[], size_t nda } } PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c index e56387c8067..a7842c5ffb4 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c @@ -69,17 +69,23 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo, pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: spawn called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* ensure the nspace (if provided) is initialized */ if (NULL != nspace) { @@ -88,7 +94,6 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo, /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; if (PMIX_SUCCESS != (rc = PMIx_Spawn_nb(job_info, ninfo, apps, napps, spawn_cbfunc, cb))) { PMIX_RELEASE(cb); @@ -96,7 +101,7 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo, } /* wait for the result */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; if (NULL != nspace) { (void)strncpy(nspace, cb->nspace, PMIX_MAX_NSLEN); @@ -115,17 +120,22 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn_nb(const pmix_info_t job_info[], size_t nin pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: spawn called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ @@ -171,7 +181,7 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn_nb(const pmix_info_t job_info[], size_t nin cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -242,5 +252,5 @@ static void spawn_cbfunc(pmix_status_t status, char nspace[], void *cbdata) (void)strncpy(cb->nspace, nspace, PMIX_MAX_NSLEN); } PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c index b0f614b582b..cf2f546f777 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c @@ -21,6 +21,7 @@ #include #include +#include "src/threads/threads.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" @@ -101,13 +102,23 @@ PMIX_EXPORT pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_ pmix_status_t rc; pmix_query_caddy_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: job control called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + /* if we aren't connected, don't attempt to send */ + if (!PMIX_PROC_IS_SERVER && !pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* if we are the server, then we just issue the request and * return the response */ if (PMIX_PROC_SERVER == pmix_globals.proc_type) { @@ -125,12 +136,6 @@ PMIX_EXPORT pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_ } /* if we are a client, then relay this request to the server */ - - /* if we aren't connected, don't attempt to send */ - if (!pmix_globals.connected) { - return PMIX_ERR_UNREACH; - } - msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { @@ -178,7 +183,7 @@ PMIX_EXPORT pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_ cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -195,13 +200,23 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm pmix_status_t rc; pmix_query_caddy_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: monitor called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + /* if we aren't connected, don't attempt to send */ + if (!PMIX_PROC_IS_SERVER && !pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* if we are the server, then we just issue the request and * return the response */ if (PMIX_PROC_SERVER == pmix_globals.proc_type) { @@ -217,12 +232,6 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm } /* if we are a client, then relay this request to the server */ - - /* if we aren't connected, don't attempt to send */ - if (!pmix_globals.connected) { - return PMIX_ERR_UNREACH; - } - msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { @@ -267,7 +276,7 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_log.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_log.c index 6fb39262a77..92ea30189ed 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_log.c +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_log.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -21,6 +21,7 @@ #include #include +#include "src/threads/threads.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" @@ -61,13 +62,23 @@ PMIX_EXPORT pmix_status_t PMIx_Log_nb(const pmix_info_t data[], size_t ndata, pmix_buffer_t *msg; pmix_status_t rc; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:log non-blocking"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + /* if we aren't connected, don't attempt to send */ + if (!PMIX_PROC_IS_SERVER && !pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + if (0 == ndata || NULL == data) { return PMIX_ERR_BAD_PARAM; } @@ -126,7 +137,7 @@ PMIX_EXPORT pmix_status_t PMIx_Log_nb(const pmix_info_t data[], size_t ndata, pmix_output_verbose(2, pmix_globals.debug_output, "pmix:query sending to server"); - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, log_cbfunc, (void*)cd))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, log_cbfunc, (void*)cd))){ PMIX_RELEASE(cd); } } diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c index e4d772f821e..5eec3f79c74 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c @@ -21,6 +21,7 @@ #include #include +#include "src/threads/threads.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" @@ -101,13 +102,23 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque pmix_buffer_t *msg; pmix_status_t rc; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:query non-blocking"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + /* if we aren't connected, don't attempt to send */ + if (!PMIX_PROC_IS_SERVER && !pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + if (0 == nqueries || NULL == queries) { return PMIX_ERR_BAD_PARAM; } @@ -127,12 +138,6 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque rc = PMIX_SUCCESS; } else { /* if we are a client, then relay this request to the server */ - - /* if we aren't connected, don't attempt to send */ - if (!pmix_globals.connected) { - return PMIX_ERR_UNREACH; - } - cd = PMIX_NEW(pmix_query_caddy_t); cd->cbfunc = cbfunc; cd->cbdata = cbdata; @@ -157,7 +162,7 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque } pmix_output_verbose(2, pmix_globals.debug_output, "pmix:query sending to server"); - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, query_cbfunc, (void*)cd))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, query_cbfunc, (void*)cd))){ PMIX_RELEASE(cd); } } @@ -240,7 +245,7 @@ PMIX_EXPORT pmix_status_t PMIx_Allocation_request_nb(pmix_alloc_directive_t dire cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h index 55f3fac311f..715289f5038 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h @@ -165,6 +165,7 @@ void pmix_event_timeout_cb(int fd, short flags, void *arg); ch->timer_active = true; \ pmix_event_assign(&ch->ev, pmix_globals.evbase, -1, 0, \ pmix_event_timeout_cb, ch); \ + PMIX_POST_OBJECT(ch); \ pmix_event_add(&ch->ev, &pmix_globals.event_window); \ } else { \ /* add this peer to the array of sources */ \ @@ -183,6 +184,7 @@ void pmix_event_timeout_cb(int fd, short flags, void *arg); ch->ninfo = ninfo; \ /* reset the timer */ \ pmix_event_del(&ch->ev); \ + PMIX_POST_OBJECT(ch); \ pmix_event_add(&ch->ev, &pmix_globals.event_window); \ } \ } while(0) diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c index 1a2b82eedd5..27b1ed78260 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c @@ -18,6 +18,7 @@ #include #include +#include "src/threads/threads.h" #include "src/util/error.h" #include "src/util/output.h" @@ -44,7 +45,22 @@ PMIX_EXPORT pmix_status_t PMIx_Notify_event(pmix_status_t status, { int rc; - if (PMIX_PROC_SERVER == pmix_globals.proc_type) { + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + + /* if we aren't connected, don't attempt to send */ + if (!PMIX_PROC_IS_SERVER && !pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + + + if (PMIX_PROC_IS_SERVER) { rc = pmix_server_notify_client_of_event(status, source, range, info, ninfo, cbfunc, cbdata); @@ -103,10 +119,6 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, pmix_globals.myid.nspace, pmix_globals.myid.rank, PMIx_Error_string(status)); - if (!pmix_globals.connected) { - return PMIX_ERR_UNREACH; - } - if (PMIX_RANGE_PROC_LOCAL != range) { /* create the msg object */ msg = PMIX_NEW(pmix_buffer_t); @@ -175,12 +187,11 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, cd->source.rank = source->rank; } cd->range = range; - - /* check for directives */ - if (NULL != info) { + if (0 < chain->ninfo) { cd->ninfo = chain->ninfo; PMIX_INFO_CREATE(cd->info, cd->ninfo); - for (n=0; n < chain->ninfo; n++) { + /* need to copy the info */ + for (n=0; n < cd->ninfo; n++) { PMIX_INFO_XFER(&cd->info[n], &chain->info[n]); if (0 == strncmp(cd->info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) { cd->nondefault = true; @@ -205,6 +216,7 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, } } } + /* add to our cache */ rbout = pmix_ring_buffer_push(&pmix_globals.notifications, cd); /* if an older event was bumped, release it */ @@ -225,7 +237,7 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, pmix_output_verbose(2, pmix_globals.debug_output, "client: notifying server %s:%d - sending", pmix_globals.myid.nspace, pmix_globals.myid.rank); - rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, notify_event_cbfunc, cb); + rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, notify_event_cbfunc, cb); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(cb); @@ -288,6 +300,7 @@ static void progress_local_event_hdlr(pmix_status_t status, ++cnt; } } + /* save this handler's returned status */ if (NULL != chain->evhdlr->name) { (void)strncpy(newinfo[cnt].key, chain->evhdlr->name, PMIX_MAX_KEYLEN); diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c index 4b532b79297..21fcc381301 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c @@ -16,6 +16,7 @@ #include #include +#include "src/threads/threads.h" #include "src/util/error.h" #include "src/util/output.h" @@ -67,6 +68,7 @@ PMIX_CLASS_INSTANCE(pmix_rshift_caddy_t, pmix_object_t, rscon, rsdes); +static void check_cached_events(pmix_rshift_caddy_t *cd); static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_ptl_hdr_t *hdr, pmix_buffer_t *buf, void *cbdata) @@ -107,6 +109,11 @@ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_ptl_hdr_t *hdr, if (NULL != cd && NULL != cd->evregcbfn) { cd->evregcbfn(ret, index, cd->cbdata); } + if (NULL != cd) { + /* check this event against anything in our cache */ + check_cached_events(cd); + } + /* release any info we brought along as they are * internally generated and not provided by the caller */ if (NULL!= rb->info) { @@ -148,6 +155,7 @@ static void reg_cbfunc(pmix_status_t status, void *cbdata) /* pass back our local index */ cd->evregcbfn(rc, index, cd->cbdata); } + /* release any info we brought along as they are * internally generated and not provided by the caller */ if (NULL!= rb->info) { @@ -197,7 +205,7 @@ static pmix_status_t _send_to_server(pmix_rshift_caddy_t *rcd) return rc; } } - rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, regevents_cbfunc, rcd); + rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, regevents_cbfunc, rcd); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); @@ -280,7 +288,7 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer) /* if we are a client, and we haven't already registered a handler of this * type with our server, or if we have directives, then we need to notify * the server */ - if (PMIX_PROC_SERVER != pmix_globals.proc_type && + if (!PMIX_PROC_IS_SERVER && (need_register || 0 < pmix_list_get_size(xfer))) { pmix_output_verbose(2, pmix_globals.debug_output, "pmix: _add_hdlr sending to server"); @@ -301,7 +309,7 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer) /* if we are a server and are registering for events, then we only contact * our host if we want environmental events */ - if (PMIX_PROC_SERVER == pmix_globals.proc_type && cd->enviro && + if (PMIX_PROC_IS_SERVER && cd->enviro && NULL != pmix_host_server.register_events) { pmix_output_verbose(2, pmix_globals.debug_output, "pmix: _add_hdlr registering with server"); @@ -325,24 +333,91 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer) return PMIX_SUCCESS; } +static void check_cached_events(pmix_rshift_caddy_t *cd) +{ + size_t i, n; + pmix_notify_caddy_t *ncd; + bool found, matched; + pmix_event_chain_t *chain; + + for (i=0; i < (size_t)pmix_globals.notifications.size; i++) { + if (NULL == (ncd = (pmix_notify_caddy_t*)pmix_ring_buffer_poke(&pmix_globals.notifications, i))) { + continue; + } + found = false; + if (NULL == cd->codes) { + /* they registered a default event handler - always matches */ + found = true; + } else { + for (n=0; n < cd->ncodes; n++) { + if (cd->codes[n] == ncd->status) { + found = true; + break; + } + } + } + if (found) { + /* if we were given specific targets, check if we are one */ + if (NULL != ncd->targets) { + matched = false; + for (n=0; n < ncd->ntargets; n++) { + if (0 != strncmp(pmix_globals.myid.nspace, ncd->targets[n].nspace, PMIX_MAX_NSLEN)) { + continue; + } + if (PMIX_RANK_WILDCARD == ncd->targets[n].rank || + pmix_globals.myid.rank == ncd->targets[n].rank) { + matched = true; + break; + } + } + if (!matched) { + /* do not notify this one */ + continue; + } + } + /* all matches - notify */ + chain = PMIX_NEW(pmix_event_chain_t); + chain->status = ncd->status; + (void)strncpy(chain->source.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); + chain->source.rank = pmix_globals.myid.rank; + /* we already left space for evhandler name plus + * a callback object when we cached the notification */ + chain->ninfo = ncd->ninfo; + PMIX_INFO_CREATE(chain->info, chain->ninfo); + if (0 < cd->ninfo) { + /* need to copy the info */ + for (n=0; n < ncd->ninfo; n++) { + PMIX_INFO_XFER(&chain->info[n], &ncd->info[n]); + if (0 == strncmp(chain->info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) { + chain->nondefault = true; + } + } + } + /* we don't want this chain to propagate, so indicate it + * should only be run as a single-shot */ + chain->endchain = true; + /* now notify any matching registered callbacks we have */ + pmix_invoke_local_event_hdlr(chain); + } + } +} + static void reg_event_hdlr(int sd, short args, void *cbdata) { pmix_rshift_caddy_t *cd = (pmix_rshift_caddy_t*)cbdata; - size_t index = 0, n, i; + size_t index = 0, n; pmix_status_t rc; pmix_event_hdlr_t *evhdlr, *ev; uint8_t location = PMIX_EVENT_ORDER_NONE; char *name = NULL, *locator = NULL; bool firstoverall=false, lastoverall=false; - bool found, matched; + bool found; pmix_list_t xfer; pmix_info_caddy_t *ixfer; void *cbobject = NULL; pmix_data_range_t range = PMIX_RANGE_UNDEF; pmix_proc_t *parray = NULL; size_t nprocs; - pmix_notify_caddy_t *ncd; - pmix_event_chain_t *chain; /* need to acquire the object from its originating thread */ PMIX_ACQUIRE_OBJECT(cd); @@ -680,63 +755,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) } /* check if any matching notifications have been cached */ - for (i=0; i < (size_t)pmix_globals.notifications.size; i++) { - if (NULL == (ncd = (pmix_notify_caddy_t*)pmix_ring_buffer_poke(&pmix_globals.notifications, i))) { - break; - } - found = false; - if (NULL == cd->codes) { - /* they registered a default event handler - always matches */ - found = true; - } else { - for (n=0; n < cd->ncodes; n++) { - if (cd->codes[n] == ncd->status) { - found = true; - break; - } - } - } - if (found) { - /* if we were given specific targets, check if we are one */ - if (NULL != ncd->targets) { - matched = false; - for (n=0; n < ncd->ntargets; n++) { - if (0 != strncmp(pmix_globals.myid.nspace, ncd->targets[n].nspace, PMIX_MAX_NSLEN)) { - continue; - } - if (PMIX_RANK_WILDCARD == ncd->targets[n].rank || - pmix_globals.myid.rank == ncd->targets[n].rank) { - matched = true; - break; - } - } - if (!matched) { - /* do not notify this one */ - continue; - } - } - /* all matches - notify */ - chain = PMIX_NEW(pmix_event_chain_t); - chain->status = ncd->status; - (void)strncpy(chain->source.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); - chain->source.rank = pmix_globals.myid.rank; - /* we already left space for evhandler name plus - * a callback object when we cached the notification */ - chain->ninfo = ncd->ninfo; - PMIX_INFO_CREATE(chain->info, chain->ninfo); - if (0 < cd->ninfo) { - /* need to copy the info */ - for (n=0; n < ncd->ninfo; n++) { - PMIX_INFO_XFER(&chain->info[n], &ncd->info[n]); - } - } - /* we don't want this chain to propagate, so indicate it - * should only be run as a single-shot */ - chain->endchain = true; - /* now notify any matching registered callbacks we have */ - pmix_invoke_local_event_hdlr(chain); - } - } + check_cached_events(cd); /* all done */ PMIX_RELEASE(cd); @@ -750,6 +769,17 @@ PMIX_EXPORT void PMIx_Register_event_handler(pmix_status_t codes[], size_t ncode { pmix_rshift_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + if (NULL != cbfunc) { + cbfunc(PMIX_ERR_INIT, 0, cbdata); + } + return; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* need to thread shift this request so we can access * our global data to register this *local* event handler */ cd = PMIX_NEW(pmix_rshift_caddy_t); @@ -783,7 +813,7 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata) /* if I am not the server, then I need to notify the server * to remove my registration */ - if (PMIX_PROC_SERVER != pmix_globals.proc_type) { + if (!PMIX_PROC_IS_SERVER) { msg = PMIX_NEW(pmix_buffer_t); if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { PMIX_RELEASE(msg); @@ -928,7 +958,7 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata) report: if (NULL != msg) { /* send to the server */ - rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, NULL, NULL); + rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, NULL, NULL); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); } @@ -948,6 +978,16 @@ PMIX_EXPORT void PMIx_Deregister_event_handler(size_t event_hdlr_ref, { pmix_shift_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + if (NULL != cbfunc) { + cbfunc(PMIX_ERR_INIT, cbdata); + } + return; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* need to thread shift this request */ cd = PMIX_NEW(pmix_shift_caddy_t); cd->cbfunc.opcbfn = cbfunc; diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c index 5dfbcd4d72a..85882d3e2fa 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c @@ -40,10 +40,17 @@ #include "src/buffer_ops/types.h" #include "src/class/pmix_hash_table.h" #include "src/class/pmix_list.h" +#include "src/threads/threads.h" + +pmix_lock_t pmix_global_lock = { + .mutex = PMIX_MUTEX_STATIC_INIT, + .cond = PMIX_CONDITION_STATIC_INIT, + .active = false +}; static void cbcon(pmix_cb_t *p) { - p->active = false; + PMIX_CONSTRUCT_LOCK(&p->lock); p->checked = false; PMIX_CONSTRUCT(&p->data, pmix_buffer_t); p->cbfunc = NULL; @@ -63,6 +70,7 @@ static void cbcon(pmix_cb_t *p) } static void cbdes(pmix_cb_t *p) { + PMIX_DESTRUCT_LOCK(&p->lock); PMIX_DESTRUCT(&p->data); } PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_cb_t, @@ -220,7 +228,7 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_rank_info_t, static void scon(pmix_shift_caddy_t *p) { - p->active = false; + PMIX_CONSTRUCT_LOCK(&p->lock); p->codes = NULL; p->ncodes = 0; p->nspace = NULL; @@ -242,6 +250,7 @@ static void scon(pmix_shift_caddy_t *p) } static void scdes(pmix_shift_caddy_t *p) { + PMIX_DESTRUCT_LOCK(&p->lock); if (NULL != p->kv) { PMIX_RELEASE(p->kv); } @@ -256,6 +265,7 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_info_caddy_t, static void qcon(pmix_query_caddy_t *p) { + PMIX_CONSTRUCT_LOCK(&p->lock); p->queries = NULL; p->nqueries = 0; p->targets = NULL; @@ -266,9 +276,13 @@ static void qcon(pmix_query_caddy_t *p) p->cbdata = NULL; p->relcbfunc = NULL; } -PMIX_CLASS_INSTANCE(pmix_query_caddy_t, - pmix_object_t, - qcon, NULL); +static void qdes(pmix_query_caddy_t *p) +{ + PMIX_DESTRUCT_LOCK(&p->lock); +} +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_query_caddy_t, + pmix_object_t, + qcon, qdes); static void jdcon(pmix_job_data_caddy_t *p) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h index 34b0b904273..5cf9886a5f2 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h @@ -214,7 +214,7 @@ PMIX_CLASS_DECLARATION(pmix_server_caddy_t); typedef struct { pmix_object_t super; pmix_event_t ev; - volatile bool active; + pmix_lock_t lock; pmix_status_t status; pmix_query_t *queries; size_t nqueries; @@ -234,7 +234,7 @@ typedef struct { pmix_cmd_t type; pmix_proc_t *pcs; // copy of the original array of participants size_t npcs; // number of procs in the array - volatile bool active; // flag for waiting for completion + pmix_lock_t lock; // flag for waiting for completion bool def_complete; // all local procs have been registered and the trk definition is complete pmix_list_t ranks; // list of pmix_rank_info_t of the local participants pmix_list_t local_cbs; // list of pmix_server_caddy_t for sending result to the local participants @@ -271,7 +271,7 @@ PMIX_CLASS_DECLARATION(pmix_job_data_caddy_t); typedef struct { pmix_object_t super; pmix_event_t ev; - volatile bool active; + pmix_lock_t lock; pmix_status_t status; pmix_status_t *codes; size_t ncodes; @@ -305,7 +305,7 @@ PMIX_CLASS_DECLARATION(pmix_shift_caddy_t); typedef struct { pmix_list_item_t super; pmix_event_t ev; - volatile bool active; + pmix_lock_t lock; bool checked; int status; pmix_status_t pstatus; @@ -340,7 +340,6 @@ PMIX_CLASS_DECLARATION(pmix_info_caddy_t); #define PMIX_THREADSHIFT(r, c) \ do { \ - (r)->active = true; \ pmix_event_assign(&((r)->ev), pmix_globals.evbase, \ -1, EV_WRITE, (c), (r)); \ PMIX_POST_OBJECT((r)); \ @@ -356,6 +355,24 @@ PMIX_CLASS_DECLARATION(pmix_info_caddy_t); PMIX_ACQUIRE_OBJECT((a)); \ } while (0) +typedef struct { + pmix_object_t super; + pmix_event_t ev; + pmix_lock_t lock; + pmix_status_t status; + pmix_proc_t source; + pmix_data_range_t range; + pmix_proc_t *targets; + size_t ntargets; + bool nondefault; + pmix_info_t *info; + size_t ninfo; + pmix_buffer_t *buf; + pmix_op_cbfunc_t cbfunc; + void *cbdata; +} pmix_notify_caddy_t; +PMIX_CLASS_DECLARATION(pmix_notify_caddy_t); + /**** GLOBAL STORAGE ****/ /* define a global construct that includes values that must be shared @@ -384,6 +401,7 @@ typedef struct { PMIX_EXPORT extern pmix_globals_t pmix_globals; +PMIX_EXPORT extern pmix_lock_t pmix_global_lock; END_C_DECLS diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/prefetch.h b/opal/mca/pmix/pmix2x/pmix/src/include/prefetch.h index 78fc3f49aeb..09e581028e5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/prefetch.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/prefetch.h @@ -1,7 +1,7 @@ /* * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,6 +20,8 @@ #ifndef PMIX_PREFETCH_H #define PMIX_PREFETCH_H +#include + #if PMIX_C_HAVE_BUILTIN_EXPECT #define PMIX_LIKELY(expression) __builtin_expect(!!(expression), 1) #define PMIX_UNLIKELY(expression) __builtin_expect(!!(expression), 0) diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c index 08d794a0dc4..fbcf19cb022 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c @@ -82,9 +82,11 @@ static pmix_status_t pmix_ptl_close(void) /* ensure the listen thread has been shut down */ pmix_ptl.stop_listening(); - if (0 <= pmix_client_globals.myserver.sd) { - CLOSE_THE_SOCKET(pmix_client_globals.myserver.sd); - pmix_client_globals.myserver.sd = -1; + if (NULL != pmix_client_globals.myserver) { + if (0 <= pmix_client_globals.myserver->sd) { + CLOSE_THE_SOCKET(pmix_client_globals.myserver->sd); + pmix_client_globals.myserver->sd = -1; + } } /* the components will cleanup when closed */ @@ -105,7 +107,6 @@ static pmix_status_t pmix_ptl_open(pmix_mca_base_open_flag_t flags) PMIX_CONSTRUCT(&pmix_ptl_globals.unexpected_msgs, pmix_list_t); pmix_ptl_globals.listen_thread_active = false; PMIX_CONSTRUCT(&pmix_ptl_globals.listeners, pmix_list_t); - pmix_client_globals.myserver.sd = -1; /* Open up all available components */ return pmix_mca_base_framework_components_open(&pmix_ptl_base_framework, flags); @@ -142,6 +143,7 @@ PMIX_CLASS_INSTANCE(pmix_ptl_send_t, static void rcon(pmix_ptl_recv_t *p) { + p->peer = NULL; memset(&p->hdr, 0, sizeof(pmix_ptl_hdr_t)); p->hdr.tag = UINT32_MAX; p->hdr.nbytes = 0; @@ -150,9 +152,15 @@ static void rcon(pmix_ptl_recv_t *p) p->rdptr = NULL; p->rdbytes = 0; } +static void rdes(pmix_ptl_recv_t *p) +{ + if (NULL != p->peer) { + PMIX_RELEASE(p->peer); + } +} PMIX_CLASS_INSTANCE(pmix_ptl_recv_t, pmix_list_item_t, - rcon, NULL); + rcon, rdes); static void prcon(pmix_ptl_posted_recv_t *p) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_select.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_select.c index cee50a0325a..5e70a07ac2e 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_select.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_select.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -70,7 +70,7 @@ int pmix_ptl_base_select(void) PMIX_LIST_FOREACH(active, &pmix_ptl_globals.actives, pmix_ptl_base_active_t) { if (newactive->pri > active->pri) { pmix_list_insert_pos(&pmix_ptl_globals.actives, - (pmix_list_item_t*)active, &newactive->super); + &active->super, &newactive->super); inserted = true; break; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c index a9944d756c6..350c4d81bda 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c @@ -173,7 +173,7 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err) PMIX_DESTRUCT(&buf); /* if I called finalize, then don't generate an event */ if (!pmix_globals.mypeer->finalized) { - PMIX_REPORT_EVENT(err, &pmix_client_globals.myserver, PMIX_RANGE_LOCAL, _notify_complete); + PMIX_REPORT_EVENT(err, pmix_client_globals.myserver, PMIX_RANGE_LOCAL, _notify_complete); } } } @@ -183,6 +183,7 @@ static pmix_status_t send_msg(int sd, pmix_ptl_send_t *msg) struct iovec iov[2]; int iov_count; ssize_t remain = msg->sdbytes, rc; + iov[0].iov_base = msg->sdptr; iov[0].iov_len = msg->sdbytes; if (!msg->hdr_sent && NULL != msg->data) { @@ -297,7 +298,7 @@ static pmix_status_t read_bytes(int sd, char **buf, size_t *remain) ptr += rc; } /* we read the full data block */ -exit: + exit: *buf = ptr; return ret; } @@ -316,7 +317,8 @@ void pmix_ptl_base_send_handler(int sd, short flags, void *cbdata) PMIX_ACQUIRE_OBJECT(peer); pmix_output_verbose(2, pmix_globals.debug_output, - "ptl:base:send_handler SENDING TO PEER %s:%d tag %u with %s msg", + "%s:%d ptl:base:send_handler SENDING TO PEER %s:%d tag %u with %s msg", + pmix_globals.myid.nspace, pmix_globals.myid.rank, peer->info->nptr->nspace, peer->info->rank, (NULL == msg) ? UINT_MAX : ntohl(msg->hdr.tag), (NULL == msg) ? "NULL" : "NON-NULL"); @@ -335,14 +337,24 @@ void pmix_ptl_base_send_handler(int sd, short flags, void *cbdata) /* exit this event and let the event lib progress */ pmix_output_verbose(2, pmix_globals.debug_output, "ptl:base:send_handler RES BUSY OR WOULD BLOCK"); + /* ensure we post the modified peer object before another thread + * picks it back up */ + PMIX_POST_OBJECT(peer); return; } else { + pmix_output_verbose(5, pmix_globals.debug_output, + "%s:%d SEND ERROR %s", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + PMIx_Error_string(rc)); // report the error pmix_event_del(&peer->send_event); peer->send_ev_active = false; PMIX_RELEASE(msg); peer->send_msg = NULL; lost_connection(peer, rc); + /* ensure we post the modified peer object before another thread + * picks it back up */ + PMIX_POST_OBJECT(peer); return; } @@ -361,6 +373,9 @@ void pmix_ptl_base_send_handler(int sd, short flags, void *cbdata) pmix_event_del(&peer->send_event); peer->send_ev_active = false; } + /* ensure we post the modified peer object before another thread + * picks it back up */ + PMIX_POST_OBJECT(peer); } /* @@ -381,7 +396,8 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) PMIX_ACQUIRE_OBJECT(peer); pmix_output_verbose(2, pmix_globals.debug_output, - "ptl:base:recv:handler called with peer %s:%d", + "%s:%d ptl:base:recv:handler called with peer %s:%d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, (NULL == peer) ? "NULL" : peer->info->nptr->nspace, (NULL == peer) ? PMIX_RANK_UNDEF : peer->info->rank); @@ -397,6 +413,7 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) pmix_output(0, "sptl:base:recv_handler: unable to allocate recv message\n"); goto err_close; } + PMIX_RETAIN(peer); peer->recv_msg->peer = peer; // provide a handle back to the peer object /* start by reading the header */ peer->recv_msg->rdptr = (char*)&peer->recv_msg->hdr; @@ -430,6 +447,11 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) peer->recv_msg->data = NULL; // make sure peer->recv_msg->rdptr = NULL; peer->recv_msg->rdbytes = 0; + /* post it for delivery */ + PMIX_ACTIVATE_POST_MSG(peer->recv_msg); + peer->recv_msg = NULL; + PMIX_POST_OBJECT(peer); + return; } else { pmix_output_verbose(2, pmix_globals.debug_output, "ptl:base:recv:handler allocate data region of size %lu", @@ -451,7 +473,8 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) * and let the caller know */ pmix_output_verbose(2, pmix_globals.debug_output, - "ptl:base:msg_recv: peer closed connection"); + "ptl:base:msg_recv: peer %s:%d closed connection", + peer->info->nptr->nspace, peer->info->rank); goto err_close; } } @@ -464,29 +487,39 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) if (PMIX_SUCCESS == (rc = read_bytes(peer->sd, &msg->rdptr, &msg->rdbytes))) { /* we recvd all of the message */ pmix_output_verbose(2, pmix_globals.debug_output, - "RECVD COMPLETE MESSAGE FROM SERVER OF %d BYTES FOR TAG %d ON PEER SOCKET %d", + "%s:%d RECVD COMPLETE MESSAGE FROM SERVER OF %d BYTES FOR TAG %d ON PEER SOCKET %d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, (int)peer->recv_msg->hdr.nbytes, peer->recv_msg->hdr.tag, peer->sd); /* post it for delivery */ PMIX_ACTIVATE_POST_MSG(peer->recv_msg); peer->recv_msg = NULL; + /* ensure we post the modified peer object before another thread + * picks it back up */ + PMIX_POST_OBJECT(peer); return; } else if (PMIX_ERR_RESOURCE_BUSY == rc || PMIX_ERR_WOULD_BLOCK == rc) { /* exit this event and let the event lib progress */ + /* ensure we post the modified peer object before another thread + * picks it back up */ + PMIX_POST_OBJECT(peer); return; } else { /* the remote peer closed the connection - report that condition * and let the caller know */ pmix_output_verbose(2, pmix_globals.debug_output, - "ptl:base:msg_recv: peer closed connection"); + "%s:%d ptl:base:msg_recv: peer %s:%d closed connection", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + peer->info->nptr->nspace, peer->info->rank); goto err_close; } } /* success */ return; - err_close: + + err_close: /* stop all events */ if (peer->recv_ev_active) { pmix_event_del(&peer->recv_event); @@ -501,6 +534,9 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) peer->recv_msg = NULL; } lost_connection(peer, PMIX_ERR_UNREACH); + /* ensure we post the modified peer object before another thread + * picks it back up */ + PMIX_POST_OBJECT(peer); } void pmix_ptl_base_send(int sd, short args, void *cbdata) @@ -515,6 +551,9 @@ void pmix_ptl_base_send(int sd, short args, void *cbdata) NULL == queue->peer->info || NULL == queue->peer->info->nptr) { /* this peer has lost connection */ PMIX_RELEASE(queue); + /* ensure we post the object before another thread + * picks it back up */ + PMIX_POST_OBJECT(queue); return; } @@ -542,10 +581,12 @@ void pmix_ptl_base_send(int sd, short args, void *cbdata) } /* ensure the send event is active */ if (!(queue->peer)->send_ev_active) { - pmix_event_add(&(queue->peer)->send_event, 0); (queue->peer)->send_ev_active = true; + PMIX_POST_OBJECT(queue->peer); + pmix_event_add(&(queue->peer)->send_event, 0); } PMIX_RELEASE(queue); + PMIX_POST_OBJECT(snd); } void pmix_ptl_base_send_recv(int fd, short args, void *cbdata) @@ -561,6 +602,9 @@ void pmix_ptl_base_send_recv(int fd, short args, void *cbdata) if (ms->peer->sd < 0) { /* this peer's socket has been closed */ PMIX_RELEASE(ms); + /* ensure we post the object before another thread + * picks it back up */ + PMIX_POST_OBJECT(NULL); return; } @@ -577,6 +621,7 @@ void pmix_ptl_base_send_recv(int fd, short args, void *cbdata) req->tag = tag; req->cbfunc = ms->cbfunc; req->cbdata = ms->cbdata; + pmix_output_verbose(5, pmix_globals.debug_output, "posting recv on tag %d", req->tag); /* add it to the list of recvs - we cannot have unexpected messages @@ -606,11 +651,13 @@ void pmix_ptl_base_send_recv(int fd, short args, void *cbdata) } /* ensure the send event is active */ if (!ms->peer->send_ev_active) { - pmix_event_add(&ms->peer->send_event, 0); ms->peer->send_ev_active = true; + PMIX_POST_OBJECT(snd); + pmix_event_add(&ms->peer->send_event, 0); } /* cleanup */ PMIX_RELEASE(ms); + PMIX_POST_OBJECT(snd); } void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata) @@ -623,7 +670,8 @@ void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata) PMIX_ACQUIRE_OBJECT(msg); pmix_output_verbose(5, pmix_globals.debug_output, - "message received %d bytes for tag %u on socket %d", + "%s:%d message received %d bytes for tag %u on socket %d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, (int)msg->hdr.nbytes, msg->hdr.tag, msg->sd); /* see if we have a waiting recv for this message */ @@ -643,7 +691,14 @@ void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata) buf.pack_ptr = ((char*)buf.base_ptr) + buf.bytes_used; } msg->data = NULL; // protect the data region + pmix_output_verbose(5, pmix_globals.debug_output, + "%s:%d EXECUTE CALLBACK for tag %u", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + msg->hdr.tag); rcv->cbfunc(msg->peer, &msg->hdr, &buf, rcv->cbdata); + pmix_output_verbose(5, pmix_globals.debug_output, + "%s:%d CALLBACK COMPLETE", + pmix_globals.myid.nspace, pmix_globals.myid.rank); PMIX_DESTRUCT(&buf); // free's the msg data } /* done with the recv if it is a dynamic tag */ @@ -668,4 +723,7 @@ void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata) /* it is possible that someone may post a recv for this message * at some point, so we have to hold onto it */ pmix_list_append(&pmix_ptl_globals.unexpected_msgs, &msg->super); + /* ensure we post the modified object before another thread + * picks it back up */ + PMIX_POST_OBJECT(msg); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h index 2deab00bda2..55e617690aa 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h @@ -247,11 +247,11 @@ PMIX_CLASS_DECLARATION(pmix_listener_t); /* add it to the queue */ \ pmix_list_append(&(p)->send_queue, &snd->super); \ } \ - PMIX_POST_OBJECT(snd); \ /* ensure the send event is active */ \ if (!(p)->send_ev_active && 0 <= (p)->sd) { \ - pmix_event_add(&(p)->send_event, 0); \ (p)->send_ev_active = true; \ + PMIX_POST_OBJECT(snd); \ + pmix_event_add(&(p)->send_event, 0); \ } \ } while (0) diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c index e58bf45ed08..8c962c0fd52 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c @@ -123,7 +123,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, /* if I am a client, then we need to look for the appropriate * connection info in the environment */ - if (PMIX_PROC_CLIENT == pmix_globals.proc_type) { + if (PMIX_PROC_IS_CLIENT) { if (NULL == (evar = getenv("PMIX_SERVER_URI2"))) { /* not us */ return PMIX_ERR_NOT_SUPPORTED; @@ -149,12 +149,12 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, } *p2 = '\0'; ++p2; - pmix_client_globals.myserver.info = PMIX_NEW(pmix_rank_info_t); - pmix_client_globals.myserver.info->nptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(pmix_client_globals.myserver.info->nptr->nspace, p, PMIX_MAX_NSLEN); + pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); + pmix_client_globals.myserver->info->nptr = PMIX_NEW(pmix_nspace_t); + (void)strncpy(pmix_client_globals.myserver->info->nptr->nspace, p, PMIX_MAX_NSLEN); /* set the server rank */ - pmix_client_globals.myserver.info->rank = strtoull(p2, NULL, 10); + pmix_client_globals.myserver->info->rank = strtoull(p2, NULL, 10); /* save the URI, but do not overwrite what we may have received from * the info-key directives */ @@ -163,7 +163,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, } pmix_argv_free(uri); - } else if (PMIX_PROC_TOOL == pmix_globals.proc_type) { + } else if (PMIX_PROC_IS_TOOL) { /* if we already have a URI, then look no further */ if (NULL == mca_ptl_tcp_component.super.uri) { /* we have to discover the connection info, @@ -208,19 +208,16 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, *p2 = '\0'; ++p2; /* set the server nspace */ - pmix_client_globals.myserver.info = PMIX_NEW(pmix_rank_info_t); - pmix_client_globals.myserver.info->nptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(pmix_client_globals.myserver.info->nptr->nspace, srvr, PMIX_MAX_NSLEN); - pmix_client_globals.myserver.info->rank = strtoull(p2, NULL, 10); + pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); + pmix_client_globals.myserver->info->nptr = PMIX_NEW(pmix_nspace_t); + (void)strncpy(pmix_client_globals.myserver->info->nptr->nspace, srvr, PMIX_MAX_NSLEN); + pmix_client_globals.myserver->info->rank = strtoull(p2, NULL, 10); /* now parse the uti itself */ mca_ptl_tcp_component.super.uri = strdup(p); free(srvr); } } - /* mark that we are the active module for this server */ - pmix_client_globals.myserver.compat.ptl = &pmix_ptl_tcp_module; - /* setup the path to the daemon rendezvous point */ memset(&mca_ptl_tcp_component.connection, 0, sizeof(struct sockaddr_storage)); if (0 == strncmp(mca_ptl_tcp_component.super.uri, "tcp4", 4)) { @@ -285,7 +282,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, PMIX_ERROR_LOG(rc); return rc; } - pmix_client_globals.myserver.sd = sd; + pmix_client_globals.myserver->sd = sd; /* send our identity and any authentication credentials to the server */ if (PMIX_SUCCESS != (rc = send_connect_ack(sd))) { @@ -310,21 +307,22 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, pmix_ptl_base_set_nonblocking(sd); /* setup recv event */ - pmix_event_assign(&pmix_client_globals.myserver.recv_event, + pmix_event_assign(&pmix_client_globals.myserver->recv_event, pmix_globals.evbase, - pmix_client_globals.myserver.sd, + pmix_client_globals.myserver->sd, EV_READ | EV_PERSIST, - pmix_ptl_base_recv_handler, &pmix_client_globals.myserver); - pmix_event_add(&pmix_client_globals.myserver.recv_event, 0); - pmix_client_globals.myserver.recv_ev_active = true; + pmix_ptl_base_recv_handler, pmix_client_globals.myserver); + pmix_client_globals.myserver->recv_ev_active = true; + PMIX_POST_OBJECT(pmix_client_globals.myserver); + pmix_event_add(&pmix_client_globals.myserver->recv_event, 0); /* setup send event */ - pmix_event_assign(&pmix_client_globals.myserver.send_event, + pmix_event_assign(&pmix_client_globals.myserver->send_event, pmix_globals.evbase, - pmix_client_globals.myserver.sd, + pmix_client_globals.myserver->sd, EV_WRITE|EV_PERSIST, - pmix_ptl_base_send_handler, &pmix_client_globals.myserver); - pmix_client_globals.myserver.send_ev_active = false; + pmix_ptl_base_send_handler, pmix_client_globals.myserver); + pmix_client_globals.myserver->send_ev_active = false; return PMIX_SUCCESS; } @@ -403,7 +401,7 @@ static pmix_status_t send_connect_ack(int sd) * local PMIx server, if known. Now use that module to * get a credential, if the security system provides one. Not * every psec module will do so, thus we must first check */ - if (PMIX_SUCCESS != (rc = pmix_psec.create_cred(&pmix_client_globals.myserver, + if (PMIX_SUCCESS != (rc = pmix_psec.create_cred(pmix_client_globals.myserver, PMIX_PROTOCOL_V2, &cred, &len))) { return rc; } @@ -551,7 +549,7 @@ static pmix_status_t recv_connect_ack(int sd) if (PMIX_PROC_IS_CLIENT) { /* see if they want us to do the handshake */ if (PMIX_ERR_READY_FOR_HANDSHAKE == reply) { - if (PMIX_SUCCESS != (rc = pmix_psec.client_handshake(&pmix_client_globals.myserver, sd))) { + if (PMIX_SUCCESS != (rc = pmix_psec.client_handshake(pmix_client_globals.myserver, sd))) { return rc; } } else if (PMIX_SUCCESS != reply) { @@ -588,16 +586,16 @@ static pmix_status_t recv_connect_ack(int sd) pmix_globals.myid.rank = 0; /* get the server's nspace and rank so we can send to it */ - pmix_client_globals.myserver.info = PMIX_NEW(pmix_rank_info_t); - pmix_client_globals.myserver.info->nptr = PMIX_NEW(pmix_nspace_t); - pmix_ptl_base_recv_blocking(sd, (char*)pmix_client_globals.myserver.info->nptr->nspace, PMIX_MAX_NSLEN+1); - pmix_ptl_base_recv_blocking(sd, (char*)&(pmix_client_globals.myserver.info->rank), sizeof(int)); + pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); + pmix_client_globals.myserver->info->nptr = PMIX_NEW(pmix_nspace_t); + pmix_ptl_base_recv_blocking(sd, (char*)pmix_client_globals.myserver->info->nptr->nspace, PMIX_MAX_NSLEN+1); + pmix_ptl_base_recv_blocking(sd, (char*)&(pmix_client_globals.myserver->info->rank), sizeof(int)); pmix_output_verbose(2, pmix_globals.debug_output, "pmix: RECV CONNECT CONFIRMATION FOR TOOL %s:%d FROM SERVER %s:%d", pmix_globals.myid.nspace, pmix_globals.myid.rank, - pmix_client_globals.myserver.info->nptr->nspace, - pmix_client_globals.myserver.info->rank); + pmix_client_globals.myserver->info->nptr->nspace, + pmix_client_globals.myserver->info->rank); /* get the returned status from the security handshake */ pmix_ptl_base_recv_blocking(sd, (char*)&reply, sizeof(pmix_status_t)); @@ -607,7 +605,7 @@ static pmix_status_t recv_connect_ack(int sd) if (NULL == pmix_psec.client_handshake) { return PMIX_ERR_HANDSHAKE_FAILED; } - if (PMIX_SUCCESS != (reply = pmix_psec.client_handshake(&pmix_client_globals.myserver, sd))) { + if (PMIX_SUCCESS != (reply = pmix_psec.client_handshake(pmix_client_globals.myserver, sd))) { return reply; } /* if the handshake succeeded, then fall thru to the next step */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c index a3c9006d7c3..0a090bb51de 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c @@ -116,12 +116,12 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, } /* set the server nspace */ - pmix_client_globals.myserver.info = PMIX_NEW(pmix_rank_info_t); - pmix_client_globals.myserver.info->nptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(pmix_client_globals.myserver.info->nptr->nspace, uri[0], PMIX_MAX_NSLEN); + pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); + pmix_client_globals.myserver->info->nptr = PMIX_NEW(pmix_nspace_t); + (void)strncpy(pmix_client_globals.myserver->info->nptr->nspace, uri[0], PMIX_MAX_NSLEN); /* set the server rank */ - pmix_client_globals.myserver.info->rank = strtoull(uri[1], NULL, 10); + pmix_client_globals.myserver->info->rank = strtoull(uri[1], NULL, 10); /* setup the path to the daemon rendezvous point */ memset(&mca_ptl_usock_component.connection, 0, sizeof(struct sockaddr_storage)); @@ -141,7 +141,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, PMIX_ERROR_LOG(rc); return rc; } - pmix_client_globals.myserver.sd = sd; + pmix_client_globals.myserver->sd = sd; /* send our identity and any authentication credentials to the server */ if (PMIX_SUCCESS != (rc = send_connect_ack(sd))) { @@ -164,21 +164,21 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, pmix_ptl_base_set_nonblocking(sd); /* setup recv event */ - pmix_event_assign(&pmix_client_globals.myserver.recv_event, + pmix_event_assign(&pmix_client_globals.myserver->recv_event, pmix_globals.evbase, - pmix_client_globals.myserver.sd, + pmix_client_globals.myserver->sd, EV_READ | EV_PERSIST, pmix_ptl_base_recv_handler, &pmix_client_globals.myserver); - pmix_event_add(&pmix_client_globals.myserver.recv_event, 0); - pmix_client_globals.myserver.recv_ev_active = true; + pmix_event_add(&pmix_client_globals.myserver->recv_event, 0); + pmix_client_globals.myserver->recv_ev_active = true; /* setup send event */ - pmix_event_assign(&pmix_client_globals.myserver.send_event, + pmix_event_assign(&pmix_client_globals.myserver->send_event, pmix_globals.evbase, - pmix_client_globals.myserver.sd, + pmix_client_globals.myserver->sd, EV_WRITE|EV_PERSIST, pmix_ptl_base_send_handler, &pmix_client_globals.myserver); - pmix_client_globals.myserver.send_ev_active = false; + pmix_client_globals.myserver->send_ev_active = false; return PMIX_SUCCESS; } @@ -244,7 +244,7 @@ static pmix_status_t send_connect_ack(int sd) /* get a credential, if the security system provides one. Not * every SPC will do so, thus we must first check */ - if (PMIX_SUCCESS != (rc = pmix_psec.create_cred(&pmix_client_globals.myserver, + if (PMIX_SUCCESS != (rc = pmix_psec.create_cred(pmix_client_globals.myserver, PMIX_PROTOCOL_V1, &cred, &len))) { return rc; } @@ -331,7 +331,7 @@ static pmix_status_t recv_connect_ack(int sd) /* see if they want us to do the handshake */ if (PMIX_ERR_READY_FOR_HANDSHAKE == reply) { - if (PMIX_SUCCESS != (rc = pmix_psec.client_handshake(&pmix_client_globals.myserver, sd))) { + if (PMIX_SUCCESS != (rc = pmix_psec.client_handshake(pmix_client_globals.myserver, sd))) { return rc; } } else if (PMIX_SUCCESS != reply) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.c b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.c index efa32eaa6b3..df0af87c280 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.c +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.c @@ -106,7 +106,6 @@ static void* progress_engine(pmix_object_t *obj) pmix_progress_tracker_t *trk = (pmix_progress_tracker_t*)t->t_arg; while (trk->ev_active) { - pmix_event_loop(trk->ev_base, PMIX_EVLOOP_ONCE); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c index 15d08d54db3..94bc36c4fe1 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c @@ -141,9 +141,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, NULL }; - if (0 < pmix_globals.init_cntr) { - return PMIX_SUCCESS; - } + PMIX_ACQUIRE_THREAD(&pmix_global_lock); pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server init called"); @@ -152,15 +150,18 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, * opens and initializes the required frameworks */ if (PMIX_SUCCESS != (rc = pmix_rte_init(PMIX_PROC_SERVER, info, ninfo, NULL))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } if (0 != (rc = initialize_server_base(module))) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) if (PMIX_SUCCESS != (rc = pmix_dstore_init(info, ninfo))) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } #endif /* PMIX_ENABLE_DSTORE */ @@ -174,7 +175,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, if (PMIX_SUCCESS != pmix_ptl_base_start_listening(info, ninfo)) { pmix_show_help("help-pmix-server.txt", "listener-thread-start", true); - PMIx_server_finalize(); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } @@ -204,7 +205,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, kv.key = NULL; kv.value = NULL; PMIX_DESTRUCT(&kv); - PMIx_server_finalize(); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } } @@ -220,7 +221,14 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, /* get our available ptl modules */ ptl_mode = pmix_ptl.get_available_modules(); + /* just in case, assign our own default modules */ + if (PMIX_SUCCESS != (rc = pmix_psec.assign_module(pmix_globals.mypeer, NULL))) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + ++pmix_globals.init_cntr; + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; } @@ -230,11 +238,19 @@ PMIX_EXPORT pmix_status_t PMIx_server_finalize(void) int i; pmix_peer_t *peer; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + if (1 != pmix_globals.init_cntr) { --pmix_globals.init_cntr; + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; } pmix_globals.init_cntr = 0; + PMIX_RELEASE_THREAD(&pmix_global_lock); pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server finalize called"); @@ -500,6 +516,13 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_nspace(const char nspace[], int n { pmix_setup_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + cd = PMIX_NEW(pmix_setup_caddy_t); (void)strncpy(cd->proc.nspace, nspace, PMIX_MAX_NSLEN); cd->nlocalprocs = nlocalprocs; @@ -562,6 +585,16 @@ PMIX_EXPORT void PMIx_server_deregister_nspace(const char nspace[], "pmix:server deregister nspace %s", nspace); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + if (NULL != cbfunc) { + cbfunc(PMIX_ERR_INIT, cbdata); + } + return; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + cd = PMIX_NEW(pmix_setup_caddy_t); (void)strncpy(cd->proc.nspace, nspace, PMIX_MAX_NSLEN); cd->opcbfunc = cbfunc; @@ -780,6 +813,13 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_client(const pmix_proc_t *proc, { pmix_setup_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server register client %s:%d", proc->nspace, proc->rank); @@ -844,6 +884,16 @@ PMIX_EXPORT void PMIx_server_deregister_client(const pmix_proc_t *proc, { pmix_setup_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + if (NULL != cbfunc) { + cbfunc(PMIX_ERR_INIT, cbdata); + } + return; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server deregister client %s:%d", proc->nspace, proc->rank); @@ -866,6 +916,13 @@ PMIX_EXPORT pmix_status_t PMIx_server_setup_fork(const pmix_proc_t *proc, char * pmix_listener_t *lt; pmix_status_t rc; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server setup_fork for nspace %s rank %d", proc->nspace, proc->rank); @@ -944,7 +1001,7 @@ static void _dmodex_req(int sd, short args, void *cbdata) PMIX_RETAIN(cd); dcd->cd = cd; pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super); - cd->active = false; // ensure the request doesn't hang + PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang return; } @@ -956,8 +1013,7 @@ static void _dmodex_req(int sd, short args, void *cbdata) /* execute the callback */ cd->cbfunc(PMIX_SUCCESS, data, sz, cd->cbdata); - cd->active = false; - + PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang return; } @@ -976,7 +1032,7 @@ static void _dmodex_req(int sd, short args, void *cbdata) PMIX_RETAIN(cd); dcd->cd = cd; pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super); - cd->active = false; // ensure the request doesn't hang + PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang return; } @@ -989,7 +1045,7 @@ static void _dmodex_req(int sd, short args, void *cbdata) PMIX_RETAIN(cd); dcd->cd = cd; pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super); - cd->active = false; // ensure the request doesn't hang + PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang return; } @@ -1012,7 +1068,7 @@ static void _dmodex_req(int sd, short args, void *cbdata) if (NULL != data) { free(data); } - cd->active = false; + PMIX_WAKEUP_THREAD(&cd->lock); } PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc, @@ -1021,6 +1077,13 @@ PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc, { pmix_setup_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* protect against bozo */ if (NULL == cbfunc || NULL == proc) { return PMIX_ERR_BAD_PARAM; @@ -1040,7 +1103,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc, * potential threading issues */ PMIX_THREADSHIFT(cd, _dmodex_req); - PMIX_WAIT_FOR_COMPLETION(cd->active); + PMIX_WAIT_THREAD(&cd->lock); PMIX_RELEASE(cd); return PMIX_SUCCESS; } @@ -1065,7 +1128,9 @@ static void _store_internal(int sd, short args, void *cbdata) } else { cd->status = pmix_hash_store(&ns->internal, cd->rank, cd->kv); } - cd->active = false; + if (cd->lock.active) { + PMIX_WAKEUP_THREAD(&cd->lock); + } } PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc, @@ -1074,6 +1139,13 @@ PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc, pmix_shift_caddy_t *cd; pmix_status_t rc; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* setup to thread shift this request */ cd = PMIX_NEW(pmix_shift_caddy_t); cd->nspace = proc->nspace; @@ -1089,12 +1161,8 @@ PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc, return rc; } - if (PMIX_PROC_SERVER == pmix_globals.proc_type) { - PMIX_THREADSHIFT(cd, _store_internal); - PMIX_WAIT_FOR_COMPLETION(cd->active); - } else { - _store_internal(0, 0, cd); - } + PMIX_THREADSHIFT(cd, _store_internal); + PMIX_WAIT_THREAD(&cd->lock); rc = cd->status; PMIX_RELEASE(cd); @@ -1116,6 +1184,13 @@ PMIX_EXPORT pmix_status_t PMIx_generate_regex(const char *input, char **regexp) char **regexargs = NULL, *tmp, *tmp2; char *cptr; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* define the default */ *regexp = NULL; @@ -1341,6 +1416,13 @@ PMIX_EXPORT pmix_status_t PMIx_generate_ppn(const char *input, char **regexp) char *tmp, *tmp2; char *cptr; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* define the default */ *regexp = NULL; @@ -1523,6 +1605,13 @@ pmix_status_t PMIx_server_setup_application(const char nspace[], { pmix_setup_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* need to threadshift this request */ cd = PMIX_NEW(pmix_setup_caddy_t); if (NULL == cd) { @@ -1567,6 +1656,13 @@ pmix_status_t PMIx_server_setup_local_support(const char nspace[], { pmix_setup_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* need to threadshift this request */ cd = PMIX_NEW(pmix_setup_caddy_t); if (NULL == cd) { @@ -1636,7 +1732,7 @@ static void _spcb(int sd, short args, void *cbdata) if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &cd->status, 1, PMIX_STATUS))) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(cd->cd); - cd->active = false; + PMIX_WAKEUP_THREAD(&cd->lock); return; } if (PMIX_SUCCESS == cd->status) { @@ -1666,7 +1762,7 @@ static void _spcb(int sd, short args, void *cbdata) PMIX_SERVER_QUEUE_REPLY(cd->cd->peer, cd->cd->hdr.tag, reply); /* cleanup */ PMIX_RELEASE(cd->cd); - cd->active = false; + PMIX_WAKEUP_THREAD(&cd->lock); } static void spawn_cbfunc(pmix_status_t status, char *nspace, void *cbdata) @@ -1680,7 +1776,7 @@ static void spawn_cbfunc(pmix_status_t status, char *nspace, void *cbdata) cd->cd = (pmix_server_caddy_t*)cbdata;; PMIX_THREADSHIFT(cd, _spcb); - PMIX_WAIT_FOR_COMPLETION(cd->active); + PMIX_WAIT_THREAD(&cd->lock); PMIX_RELEASE(cd); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c index 97fdd7cdfe9..5826c4b8870 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c @@ -1675,7 +1675,7 @@ static void tcon(pmix_server_trkr_t *t) { t->pcs = NULL; t->npcs = 0; - t->active = true; + PMIX_CONSTRUCT_LOCK(&t->lock); t->def_complete = false; PMIX_CONSTRUCT(&t->ranks, pmix_list_t); PMIX_CONSTRUCT(&t->local_cbs, pmix_list_t); @@ -1690,6 +1690,7 @@ static void tcon(pmix_server_trkr_t *t) } static void tdes(pmix_server_trkr_t *t) { + PMIX_DESTRUCT_LOCK(&t->lock); if (NULL != t->pcs) { free(t->pcs); } @@ -1725,7 +1726,7 @@ PMIX_CLASS_INSTANCE(pmix_snd_caddy_t, static void scadcon(pmix_setup_caddy_t *p) { memset(&p->proc, 0, sizeof(pmix_proc_t)); - p->active = true; + PMIX_CONSTRUCT_LOCK(&p->lock); p->nspace = NULL; p->server_object = NULL; p->nlocalprocs = 0; @@ -1738,6 +1739,7 @@ static void scadcon(pmix_setup_caddy_t *p) } static void scaddes(pmix_setup_caddy_t *p) { + PMIX_DESTRUCT_LOCK(&p->lock); } PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_setup_caddy_t, pmix_object_t, @@ -1745,7 +1747,7 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_setup_caddy_t, static void ncon(pmix_notify_caddy_t *p) { - p->active = true; + PMIX_CONSTRUCT_LOCK(&p->lock); memset(p->source.nspace, 0, PMIX_MAX_NSLEN+1); p->source.rank = PMIX_RANK_UNDEF; p->range = PMIX_RANGE_UNDEF; @@ -1758,6 +1760,7 @@ static void ncon(pmix_notify_caddy_t *p) } static void ndes(pmix_notify_caddy_t *p) { + PMIX_DESTRUCT_LOCK(&p->lock); if (NULL != p->info) { PMIX_INFO_FREE(p->info, p->ninfo); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h index f978e058b33..dac731d2242 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h @@ -19,6 +19,7 @@ #include #include #include +#include "src/threads/threads.h" #include "src/util/hash.h" typedef struct { @@ -31,7 +32,7 @@ PMIX_CLASS_DECLARATION(pmix_trkr_caddy_t); typedef struct { pmix_object_t super; pmix_event_t ev; - volatile bool active; + pmix_lock_t lock; char *nspace; pmix_status_t status; pmix_proc_t proc; @@ -48,24 +49,6 @@ typedef struct { } pmix_setup_caddy_t; PMIX_CLASS_DECLARATION(pmix_setup_caddy_t); -typedef struct { - pmix_object_t super; - pmix_event_t ev; - volatile bool active; - pmix_status_t status; - pmix_proc_t source; - pmix_data_range_t range; - pmix_proc_t *targets; - size_t ntargets; - bool nondefault; - pmix_info_t *info; - size_t ninfo; - pmix_buffer_t *buf; - pmix_op_cbfunc_t cbfunc; - void *cbdata; -} pmix_notify_caddy_t; -PMIX_CLASS_DECLARATION(pmix_notify_caddy_t); - typedef struct { pmix_list_item_t super; pmix_setup_caddy_t *cd; diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/threads/Makefile.include index ba93edb67ab..d0d41f1c577 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/threads/Makefile.include +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/Makefile.include @@ -25,7 +25,6 @@ # Source code files headers += \ - threads/condition.h \ threads/mutex.h \ threads/mutex_unix.h \ threads/threads.h \ @@ -34,7 +33,6 @@ headers += \ threads/thread_usage.h libpmix_la_SOURCES += \ - threads/condition.c \ threads/mutex.c \ threads/thread.c \ threads/wait_sync.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/condition.c b/opal/mca/pmix/pmix2x/pmix/src/threads/condition.c deleted file mode 100644 index 13a9d3ab164..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/threads/condition.c +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "pmix_config.h" - -#include "src/threads/condition.h" - - -static void pmix_condition_construct(pmix_condition_t *c) -{ - c->c_waiting = 0; - c->c_signaled = 0; -} - - -static void pmix_condition_destruct(pmix_condition_t *c) -{ -} - -PMIX_CLASS_INSTANCE(pmix_condition_t, - pmix_object_t, - pmix_condition_construct, - pmix_condition_destruct); diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/condition.h b/opal/mca/pmix/pmix2x/pmix/src/threads/condition.h deleted file mode 100644 index 7a18660d8f2..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/threads/condition.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#ifndef PMIX_CONDITION_SPINLOCK_H -#define PMIX_CONDITION_SPINLOCK_H - -#include "pmix_config.h" -#ifdef HAVE_SYS_TIME_H -#include -#endif -#include -#include - -#include "src/threads/mutex.h" - -BEGIN_C_DECLS - -struct pmix_condition_t { - pmix_object_t super; - volatile int c_waiting; - volatile int c_signaled; -}; -typedef struct pmix_condition_t pmix_condition_t; - -PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_condition_t); - - -static inline int pmix_condition_wait(pmix_condition_t *c, pmix_mutex_t *m) -{ - int rc = 0; - c->c_waiting++; - - if (c->c_signaled) { - c->c_waiting--; - return 0; - } - - c->c_signaled--; - c->c_waiting--; - return rc; -} - -static inline int pmix_condition_signal(pmix_condition_t *c) -{ - if (c->c_waiting) { - c->c_signaled++; - } - return 0; -} - -static inline int pmix_condition_broadcast(pmix_condition_t *c) -{ - c->c_signaled = c->c_waiting; - return 0; -} - -END_C_DECLS - -#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/threads.h b/opal/mca/pmix/pmix2x/pmix/src/threads/threads.h index cee5517fa17..d66e594ead6 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/threads/threads.h +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/threads.h @@ -35,7 +35,6 @@ #endif #include "mutex.h" -#include "condition.h" BEGIN_C_DECLS @@ -59,61 +58,125 @@ PMIX_EXPORT extern bool pmix_debug_threads; PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_thread_t); +#define pmix_condition_wait(a,b) pthread_cond_wait(a, &(b)->m_lock_pthread) +typedef pthread_cond_t pmix_condition_t; +#define pmix_condition_broadcast(a) pthread_cond_broadcast(a) +#define pmix_condition_signal(a) pthread_cond_signal(a) +#define PMIX_CONDITION_STATIC_INIT PTHREAD_COND_INITIALIZER + +typedef struct { + pmix_mutex_t mutex; + pmix_condition_t cond; + volatile bool active; +} pmix_lock_t; + +#define PMIX_CONSTRUCT_LOCK(l) \ + do { \ + PMIX_CONSTRUCT(&(l)->mutex, pmix_mutex_t); \ + pthread_cond_init(&(l)->cond, NULL); \ + (l)->active = true; \ + } while(0) + +#define PMIX_DESTRUCT_LOCK(l) \ + do { \ + PMIX_DESTRUCT(&(l)->mutex); \ + pthread_cond_destroy(&(l)->cond); \ + } while(0) + + #if PMIX_ENABLE_DEBUG -#define PMIX_ACQUIRE_THREAD(lck, cnd, act) \ - do { \ - PMIX_THREAD_LOCK((lck)); \ - if (pmix_debug_threads) { \ - pmix_output(0, "Waiting for thread %s:%d", \ - __FILE__, __LINE__); \ - } \ - while (*(act)) { \ - pmix_condition_wait((cnd), (lck)); \ - } \ - if (pmix_debug_threads) { \ - pmix_output(0, "Thread obtained %s:%d", \ - __FILE__, __LINE__); \ - } \ - *(act) = true; \ - } while(0); +#define PMIX_ACQUIRE_THREAD(lck) \ + do { \ + pmix_mutex_lock(&(lck)->mutex); \ + if (pmix_debug_threads) { \ + pmix_output(0, "Waiting for thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + while ((lck)->active) { \ + pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + if (pmix_debug_threads) { \ + pmix_output(0, "Thread obtained %s:%d", \ + __FILE__, __LINE__); \ + } \ + PMIX_ACQUIRE_OBJECT(lck); \ + (lck)->active = true; \ + } while(0) #else -#define PMIX_ACQUIRE_THREAD(lck, cnd, act) \ - do { \ - PMIX_THREAD_LOCK((lck)); \ - while (*(act)) { \ - pmix_condition_wait((cnd), (lck)); \ - } \ - *(act) = true; \ - } while(0); +#define PMIX_ACQUIRE_THREAD(lck) \ + do { \ + pmix_mutex_lock(&(lck)->mutex); \ + while ((lck)->active) { \ + pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + PMIX_ACQUIRE_OBJECT(lck); \ + (lck)->active = true; \ + } while(0) #endif #if PMIX_ENABLE_DEBUG -#define PMIX_RELEASE_THREAD(lck, cnd, act) \ +#define PMIX_WAIT_THREAD(lck) \ + do { \ + pmix_mutex_lock(&(lck)->mutex); \ + if (pmix_debug_threads) { \ + pmix_output(0, "Waiting for thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + while ((lck)->active) { \ + pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + if (pmix_debug_threads) { \ + pmix_output(0, "Thread obtained %s:%d", \ + __FILE__, __LINE__); \ + } \ + PMIX_ACQUIRE_OBJECT(lck); \ + pmix_mutex_unlock(&(lck)->mutex); \ + } while(0) +#else +#define PMIX_WAIT_THREAD(lck) \ + do { \ + pmix_mutex_lock(&(lck)->mutex); \ + while ((lck)->active) { \ + pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + PMIX_ACQUIRE_OBJECT(lck); \ + pmix_mutex_unlock(&(lck)->mutex); \ + } while(0) +#endif + + +#if PMIX_ENABLE_DEBUG +#define PMIX_RELEASE_THREAD(lck) \ do { \ if (pmix_debug_threads) { \ pmix_output(0, "Releasing thread %s:%d", \ __FILE__, __LINE__); \ } \ - *(act) = false; \ - pmix_condition_broadcast((cnd)); \ - PMIX_THREAD_UNLOCK((lck)); \ - } while(0); + (lck)->active = false; \ + PMIX_POST_OBJECT(lck); \ + pmix_condition_broadcast(&(lck)->cond); \ + pmix_mutex_unlock(&(lck)->mutex); \ + } while(0) #else -#define PMIX_RELEASE_THREAD(lck, cnd, act) \ - do { \ - *(act) = false; \ - pmix_condition_broadcast((cnd)); \ - PMIX_THREAD_UNLOCK((lck)); \ - } while(0); +#define PMIX_RELEASE_THREAD(lck) \ + do { \ + (lck)->active = false; \ + PMIX_POST_OBJECT(lck); \ + pmix_condition_broadcast(&(lck)->cond); \ + pmix_mutex_unlock(&(lck)->mutex); \ + } while(0) #endif -#define PMIX_WAKEUP_THREAD(cnd, act) \ - do { \ - *(act) = false; \ - pmix_condition_broadcast((cnd)); \ - } while(0); +#define PMIX_WAKEUP_THREAD(lck) \ + do { \ + pmix_mutex_lock(&(lck)->mutex); \ + (lck)->active = false; \ + PMIX_POST_OBJECT(lck); \ + pmix_condition_broadcast(&(lck)->cond); \ + pmix_mutex_unlock(&(lck)->mutex); \ + } while(0) /* provide a macro for forward-proofing the shifting diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h b/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h index 50717a96d7e..4430912606d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h @@ -19,8 +19,9 @@ #if !defined(PMIX_THREADS_WAIT_SYNC_H) #define PMIX_THREADS_WAIT_SYNC_H +#include "src/include/prefetch.h" #include "src/atomics/sys/atomic.h" -#include "src/threads/condition.h" +#include "src/threads/threads.h" #include "src/util/error.h" #include diff --git a/opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c b/opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c index 0f4dba4445a..196938a62bb 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c +++ b/opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c @@ -158,18 +158,12 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, pmix_nspace_t *nptr, *nsptr; char hostname[PMIX_MAX_NSLEN]; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (NULL == proc) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_BAD_PARAM; } - - /* if we were given an nspace in the environment, then we - * must have been spawned by a PMIx server - so even though - * we technically will operate as a tool, we are actually - * a "client" of the PMIx server and should connect that way */ - if (NULL != getenv("PMIX_NAMESPACE")) { - return PMIx_Init(proc, info, ninfo); - } - if (0 < pmix_globals.init_cntr) { /* since we have been called before, the nspace and * rank should be known. So return them here if @@ -179,19 +173,30 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, proc->rank = pmix_globals.myid.rank; } ++pmix_globals.init_cntr; + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; } + /* if we were given an nspace in the environment, then we + * must have been spawned by a PMIx server - so even though + * we technically will operate as a tool, we are actually + * a "client" of the PMIx server and should connect that way */ + if (NULL != getenv("PMIX_NAMESPACE")) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIx_Init(proc, info, ninfo); + } + /* setup the runtime - this init's the globals, * opens and initializes the required frameworks */ if (PMIX_SUCCESS != (rc = pmix_rte_init(PMIX_PROC_TOOL, info, ninfo, pmix_tool_notify_recv))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t); - PMIX_CONSTRUCT(&pmix_client_globals.myserver, pmix_peer_t); + pmix_client_globals.myserver = PMIX_NEW(pmix_peer_t); pmix_output_verbose(2, pmix_globals.debug_output, "pmix: init called"); @@ -199,13 +204,15 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, /* select our psec module - we take the default as we cannot * do any better */ if (PMIX_SUCCESS != (rc = pmix_psec.assign_module(pmix_globals.mypeer, NULL))) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* the server will have to use the same */ - pmix_client_globals.myserver.compat.psec = pmix_globals.mypeer->compat.psec; + pmix_client_globals.myserver->compat.psec = pmix_globals.mypeer->compat.psec; /* connect to the server - returns job info if successful */ - if (PMIX_SUCCESS != (rc = pmix_ptl.connect_to_peer(&pmix_client_globals.myserver, info, ninfo))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.connect_to_peer(pmix_client_globals.myserver, info, ninfo))){ + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } @@ -228,6 +235,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, } } if (NULL == nsptr) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_NOT_FOUND; } @@ -239,6 +247,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.string = strdup(nsptr->nspace); if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -251,6 +260,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.integer = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -263,6 +273,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -275,6 +286,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 1; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -287,6 +299,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.string = strdup("0"); if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -299,7 +312,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); - return rc; + PMIX_RELEASE_THREAD(&pmix_global_lock); } PMIX_RELEASE(kptr); // maintain accounting @@ -311,6 +324,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 1; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -323,6 +337,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 1; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -335,6 +350,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 1; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -348,6 +364,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 1; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -360,6 +377,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -372,6 +390,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -384,6 +403,8 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); + return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -395,6 +416,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -407,6 +429,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -425,6 +448,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.string = strdup(hostname); if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -442,6 +466,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.string = strdup(hostname); if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -455,24 +480,49 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.string = strdup("0"); if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } -/* callback for wait completion */ -static void wait_cbfunc(struct pmix_peer_t *pr, - pmix_ptl_hdr_t *hdr, - pmix_buffer_t *buf, void *cbdata) +typedef struct { + pmix_lock_t lock; + pmix_event_t ev; + bool active; +} pmix_tool_timeout_t; + +/* timer callback */ +static void fin_timeout(int sd, short args, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + pmix_tool_timeout_t *tev; + tev = (pmix_tool_timeout_t*)cbdata; pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:tool wait_cbfunc received"); + "pmix:tool finwait timeout fired"); + if (tev->active) { + tev->active = false; + PMIX_WAKEUP_THREAD(&tev->lock); + } +} +/* callback for finalize completion */ +static void finwait_cbfunc(struct pmix_peer_t *pr, + pmix_ptl_hdr_t *hdr, + pmix_buffer_t *buf, void *cbdata) +{ + pmix_tool_timeout_t *tev; + tev = (pmix_tool_timeout_t*)cbdata; - *active = false; + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:tool finwait_cbfunc received"); + if (tev->active) { + tev->active = false; + pmix_event_del(&tev->ev); // stop the timer + PMIX_WAKEUP_THREAD(&tev->lock); + } } PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) @@ -480,13 +530,17 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) pmix_buffer_t *msg; pmix_cmd_t cmd = PMIX_FINALIZE_CMD; pmix_status_t rc; - volatile bool active; + pmix_tool_timeout_t tev; + struct timeval tv = {2, 0}; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (1 != pmix_globals.init_cntr) { --pmix_globals.init_cntr; + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; } pmix_globals.init_cntr = 0; + PMIX_RELEASE_THREAD(&pmix_global_lock); pmix_output_verbose(2, pmix_globals.debug_output, "pmix:tool finalize called"); @@ -505,15 +559,25 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) pmix_output_verbose(2, pmix_globals.debug_output, "pmix:tool sending finalize sync to server"); - /* send to the server */ - active = true;; - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, - wait_cbfunc, (void*)&active))){ + /* setup a timer to protect ourselves should the server be unable + * to answer for some reason */ + PMIX_CONSTRUCT_LOCK(&tev.lock); + pmix_event_assign(&tev.ev, pmix_globals.evbase, -1, 0, + fin_timeout, &tev); + tev.active = true; + PMIX_POST_OBJECT(&tev); + pmix_event_add(&tev.ev, &tv); + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, + finwait_cbfunc, (void*)&tev))){ return rc; } /* wait for the ack to return */ - PMIX_WAIT_FOR_COMPLETION(active); + PMIX_WAIT_THREAD(&tev.lock); + PMIX_DESTRUCT_LOCK(&tev.lock); + if (tev.active) { + pmix_event_del(&tev.ev); + } pmix_output_verbose(2, pmix_globals.debug_output, "pmix:tool finalize sync received"); @@ -525,7 +589,7 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) (void)pmix_progress_thread_pause(NULL); } - PMIX_DESTRUCT(&pmix_client_globals.myserver); + PMIX_RELEASE(pmix_client_globals.myserver); PMIX_LIST_DESTRUCT(&pmix_client_globals.pending_requests); /* shutdown services */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/hash.c b/opal/mca/pmix/pmix2x/pmix/src/util/hash.c index ba479ab3351..d76a45ac4a3 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/hash.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/hash.c @@ -6,7 +6,7 @@ * reserved. * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/output.c b/opal/mca/pmix/pmix2x/pmix/src/util/output.c index d7d36a1e92b..4ff79d596ff 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/output.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/output.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -89,7 +89,7 @@ static void construct(pmix_object_t *stream); static int do_open(int output_id, pmix_output_stream_t * lds); static int open_file(int i); static void free_descriptor(int output_id); -static int make_string(char **no_newline_string, output_desc_t *ldi, +static int make_string(char **out, char **no_newline_string, output_desc_t *ldi, const char *format, va_list arglist); static int output(int output_id, const char *format, va_list arglist); @@ -111,8 +111,6 @@ int pmix_output_redirected_syslog_pri = 0; static bool initialized = false; static int default_stderr_fd = -1; static output_desc_t info[PMIX_OUTPUT_MAX_STREAMS]; -static char *temp_str = 0; -static size_t temp_str_len = 0; #if defined(HAVE_SYSLOG) static bool syslog_opened = false; #endif @@ -356,50 +354,6 @@ void pmix_output_vverbose(int level, int output_id, const char *format, } -/* - * Send a message to a string if the verbose level is high enough - */ -char *pmix_output_string(int level, int output_id, const char *format, ...) -{ - int rc; - char *ret = NULL; - - if (output_id >= 0 && output_id < PMIX_OUTPUT_MAX_STREAMS && - info[output_id].ldi_verbose_level >= level) { - va_list arglist; - va_start(arglist, format); - rc = make_string(&ret, &info[output_id], format, arglist); - va_end(arglist); - if (PMIX_SUCCESS != rc) { - ret = NULL; - } - } - - return ret; -} - - -/* - * Send a message to a string if the verbose level is high enough - */ -char *pmix_output_vstring(int level, int output_id, const char *format, - va_list arglist) -{ - int rc; - char *ret = NULL; - - if (output_id >= 0 && output_id < PMIX_OUTPUT_MAX_STREAMS && - info[output_id].ldi_verbose_level >= level) { - rc = make_string(&ret, &info[output_id], format, arglist); - if (PMIX_SUCCESS != rc) { - ret = NULL; - } - } - - return ret; -} - - /* * Set the verbosity level of a stream */ @@ -501,11 +455,6 @@ void pmix_output_finalize(void) free (output_prefix); free (output_dir); - if(NULL != temp_str) { - free(temp_str); - temp_str = NULL; - temp_str_len = 0; - } PMIX_DESTRUCT(&verbose); } } @@ -813,14 +762,15 @@ static void free_descriptor(int output_id) } -static int make_string(char **no_newline_string, output_desc_t *ldi, +static int make_string(char **out, char **no_newline_string, output_desc_t *ldi, const char *format, va_list arglist) { - size_t len, total_len; + size_t len, total_len, temp_str_len; bool want_newline = false; + char *temp_str; /* Make the formatted string */ - + *out = NULL; if (0 > vasprintf(no_newline_string, format, arglist)) { return PMIX_ERR_NOMEM; } @@ -844,16 +794,11 @@ static int make_string(char **no_newline_string, output_desc_t *ldi, if (NULL != ldi->ldi_suffix) { total_len += strlen(ldi->ldi_suffix); } - if (temp_str_len < total_len + want_newline) { - if (NULL != temp_str) { - free(temp_str); - } - temp_str = (char *) malloc(total_len * 2); - if (NULL == temp_str) { - return PMIX_ERR_OUT_OF_RESOURCE; - } - temp_str_len = total_len * 2; + temp_str = (char *) malloc(total_len * 2); + if (NULL == temp_str) { + return PMIX_ERR_OUT_OF_RESOURCE; } + temp_str_len = total_len * 2; if (NULL != ldi->ldi_prefix && NULL != ldi->ldi_suffix) { if (want_newline) { snprintf(temp_str, temp_str_len, "%s%s%s\n", @@ -885,7 +830,7 @@ static int make_string(char **no_newline_string, output_desc_t *ldi, snprintf(temp_str, temp_str_len, "%s", *no_newline_string); } } - + *out = temp_str; return PMIX_SUCCESS; } @@ -897,7 +842,7 @@ static int make_string(char **no_newline_string, output_desc_t *ldi, static int output(int output_id, const char *format, va_list arglist) { int rc = PMIX_SUCCESS; - char *str, *out = NULL; + char *str=NULL, *out = NULL; output_desc_t *ldi; /* Setup */ @@ -913,8 +858,8 @@ static int output(int output_id, const char *format, va_list arglist) ldi = &info[output_id]; /* Make the strings */ - if (PMIX_SUCCESS != (rc = make_string(&str, ldi, format, arglist))) { - return rc; + if (PMIX_SUCCESS != (rc = make_string(&out, &str, ldi, format, arglist))) { + goto cleanup; } /* Syslog output -- does not use the newline-appended string */ @@ -924,15 +869,11 @@ static int output(int output_id, const char *format, va_list arglist) } #endif - /* All others (stdout, stderr, file) use temp_str, potentially - with a newline appended */ - - out = temp_str; - /* stdout output */ if (ldi->ldi_stdout) { if (0 > write(fileno(stdout), out, (int)strlen(out))) { - return PMIX_ERROR; + rc = PMIX_ERROR; + goto cleanup; } fflush(stdout); } @@ -942,7 +883,8 @@ static int output(int output_id, const char *format, va_list arglist) if (0 > write((-1 == default_stderr_fd) ? fileno(stderr) : default_stderr_fd, out, (int)strlen(out))) { - return PMIX_ERROR; + rc = PMIX_ERROR; + goto cleanup; } fflush(stderr); } @@ -964,7 +906,8 @@ static int output(int output_id, const char *format, va_list arglist) "[WARNING: %d lines lost because the PMIx process session directory did\n not exist when pmix_output() was invoked]\n", ldi->ldi_file_num_lines_lost); if (0 > write(ldi->ldi_fd, buffer, (int)strlen(buffer))) { - return PMIX_ERROR; + rc = PMIX_ERROR; + goto cleanup; } ldi->ldi_file_num_lines_lost = 0; if (out != buffer) { @@ -974,13 +917,22 @@ static int output(int output_id, const char *format, va_list arglist) } if (ldi->ldi_fd != -1) { if (0 > write(ldi->ldi_fd, out, (int)strlen(out))) { - return PMIX_ERROR; + rc = PMIX_ERROR; + goto cleanup; } } } free(str); + str = NULL; } + cleanup: + if (NULL != str) { + free(str); + } + if (NULL != out) { + free(out); + } return rc; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/output.h b/opal/mca/pmix/pmix2x/pmix/src/util/output.h index 52a452a175c..78bbcf119ff 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/output.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/output.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -423,29 +423,6 @@ struct pmix_output_stream_t { void pmix_output_vverbose(int verbose_level, int output_id, const char *format, va_list ap) __pmix_attribute_format__(__printf__, 3, 0); - /** - * Send output to a string if the verbosity level is high enough. - * - * @param output_id Stream id returned from pmix_output_open(). - * @param level Target verbosity level. - * @param format printf-style format string. - * @param varargs printf-style varargs list to fill the string - * specified by the format parameter. - * - * Exactly the same as pmix_output_verbose(), except the output it - * sent to a string instead of to the stream. If the verbose - * level is not high enough, NULL is returned. The caller is - * responsible for free()'ing the returned string. - */ - char *pmix_output_string(int verbose_level, int output_id, - const char *format, ...) __pmix_attribute_format__(__printf__, 3, 4); - - /** - * Same as pmix_output_string, but accepts a va_list form of varargs. - */ - char *pmix_output_vstring(int verbose_level, int output_id, - const char *format, va_list ap) __pmix_attribute_format__(__printf__, 3, 0); - /** * Set the verbosity level for a stream. * @@ -567,4 +544,3 @@ PMIX_CLASS_DECLARATION(pmix_output_stream_t); END_C_DECLS #endif /* PMIX_OUTPUT_H_ */ - diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c index 003c3437e09..df50881b5c9 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c @@ -73,6 +73,54 @@ static void opcbfunc(pmix_status_t status, void *cbdata) *active = false; } +/* this is an event notification function that we explicitly request + * be called when the PMIX_MODEL_DECLARED notification is issued. + * We could catch it in the general event notification function and test + * the status to see if the status matched, but it often is simpler + * to declare a use-specific notification callback point. In this case, + * we are asking to know whenever a model is declared as a means + * of testing server self-notification */ +static void model_callback(size_t evhdlr_registration_id, + pmix_status_t status, + const pmix_proc_t *source, + pmix_info_t info[], size_t ninfo, + pmix_info_t results[], size_t nresults, + pmix_event_notification_cbfunc_fn_t cbfunc, + void *cbdata) +{ + size_t n; + + /* just let us know it was received */ + fprintf(stderr, "%s:%d Model event handler called with status %d(%s)\n", + myproc.nspace, myproc.rank, status, PMIx_Error_string(status)); + for (n=0; n < ninfo; n++) { + if (PMIX_STRING == info[n].value.type) { + fprintf(stderr, "%s:%d\t%s:\t%s\n", + myproc.nspace, myproc.rank, + info[n].key, info[n].value.data.string); + } + } + + /* we must NOT tell the event handler state machine that we + * are the last step as that will prevent it from notifying + * anyone else that might be listening for declarations */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); + } +} + +/* event handler registration is done asynchronously */ +static void model_registration_callback(pmix_status_t status, + size_t evhandler_ref, + void *cbdata) +{ + volatile int *active = (volatile int*)cbdata; + + fprintf(stderr, "simpclient EVENT HANDLER REGISTRATION RETURN STATUS %d, ref=%lu\n", + status, (unsigned long)evhandler_ref); + *active = false; +} + int main(int argc, char **argv) { int rc; @@ -84,7 +132,9 @@ int main(int argc, char **argv) int cnt, j; bool doabort = false; volatile bool active; - pmix_info_t info; + pmix_info_t info, *iptr; + size_t ninfo; + pmix_status_t code; if (1 < argc) { if (0 == strcmp("-abort", argv[1])) { @@ -92,12 +142,16 @@ int main(int argc, char **argv) } } - /* init us */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { + /* init us and declare we are a test programming model */ + PMIX_INFO_CREATE(iptr, 2); + PMIX_INFO_LOAD(&iptr[0], PMIX_PROGRAMMING_MODEL, "TEST", PMIX_STRING); + PMIX_INFO_LOAD(&iptr[1], PMIX_MODEL_LIBRARY_NAME, "PMIX", PMIX_STRING); + if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, iptr, 2))) { pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); exit(rc); } + PMIX_INFO_FREE(iptr, 2); pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); /* test something */ @@ -110,6 +164,19 @@ int main(int argc, char **argv) } PMIX_VALUE_RELEASE(val); + /* register a handler specifically for when models declare */ + active = true; + ninfo = 1; + PMIX_INFO_CREATE(iptr, ninfo); + PMIX_INFO_LOAD(&iptr[0], PMIX_EVENT_HDLR_NAME, "SIMPCLIENT-MODEL", PMIX_STRING); + code = PMIX_MODEL_DECLARED; + PMIx_Register_event_handler(&code, 1, iptr, ninfo, + model_callback, model_registration_callback, (void*)&active); + while (active) { + usleep(10); + } + PMIX_INFO_FREE(iptr, ninfo); + /* register our errhandler */ active = true; PMIx_Register_event_handler(NULL, 0, NULL, 0, diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c index 10b236a0c51..58b89804415 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c @@ -214,9 +214,10 @@ static void model_callback(size_t evhdlr_registration_id, size_t n; /* just let us know it was received */ - fprintf(stderr, "Model event handler called with status %d(%s)\n", status, PMIx_Error_string(status)); + fprintf(stderr, "SIMPTEST: Model event handler called with status %d(%s)\n", + status, PMIx_Error_string(status)); for (n=0; n < ninfo; n++) { - if (0 == strncmp(info[n].key, PMIX_EVENT_HDLR_NAME, PMIX_MAX_KEYLEN)) { + if (PMIX_STRING == info[n].value.type) { fprintf(stderr, "\t%s:\t%s\n", info[n].key, info[n].value.data.string); } } diff --git a/opal/mca/pmix/pmix2x/pmix2x.c b/opal/mca/pmix/pmix2x/pmix2x.c index 4c7b01b6e4f..d30cd1547a9 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.c +++ b/opal/mca/pmix/pmix2x/pmix2x.c @@ -120,15 +120,32 @@ const opal_pmix_base_module_t opal_pmix_pmix2x_module = { .register_jobid = pmix2x_register_jobid }; +static void opcbfunc(pmix_status_t status, void *cbdata) +{ + pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + + OPAL_ACQUIRE_OBJECT(op); + + if (NULL != op->opcbfunc) { + op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); + } + OBJ_RELEASE(op); +} + + static const char *pmix2x_get_nspace(opal_jobid_t jobid) { opal_pmix2x_jobid_trkr_t *jptr; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { if (jptr->jobid == jobid) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return jptr->nspace; } } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return NULL; } @@ -136,9 +153,12 @@ static void pmix2x_register_jobid(opal_jobid_t jobid, const char *nspace) { opal_pmix2x_jobid_trkr_t *jptr; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + /* if we don't already have it, add this to our jobid tracker */ OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { if (jptr->jobid == jobid) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return; } } @@ -146,6 +166,7 @@ static void pmix2x_register_jobid(opal_jobid_t jobid, const char *nspace) (void)strncpy(jptr->nspace, nspace, PMIX_MAX_NSLEN); jptr->jobid = jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &jptr->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } static void event_hdlr_complete(pmix_status_t status, void *cbdata) @@ -200,42 +221,6 @@ static void return_local_event_hdlr(int status, opal_list_t *results, } } -static void _event_hdlr(int sd, short args, void *cbdata) -{ - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - opal_pmix2x_event_t *event; - - OPAL_ACQUIRE_OBJECT(cd); - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s _EVENT_HDLR RECEIVED NOTIFICATION FOR HANDLER %d OF STATUS %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (int)cd->id, cd->status); - - /* cycle thru the registrations */ - OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) { - if (cd->id == event->index) { - /* found it - invoke the handler, pointing its - * callback function to our callback function */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s _EVENT_HDLR CALLING EVHDLR", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - if (NULL != event->handler) { - event->handler(cd->status, &cd->pname, - cd->info, &cd->results, - return_local_event_hdlr, (void*)cd); - return; - } - } - } - /* if we didn't find a match, we still have to call their final callback */ - if (NULL != cd->pmixcbfunc) { - cd->pmixcbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cd->cbdata); - } - OPAL_LIST_RELEASE(cd->info); - OBJ_RELEASE(cd); - return; -} - /* this function will be called by the PMIx client library * whenever it receives notification of an event. The * notification can come from an ORTE daemon (when launched @@ -253,15 +238,14 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, int rc; opal_value_t *iptr; size_t n; - - /* this is in the PMIx local thread - need to threadshift to - * our own thread as we will be accessing framework-global - * lists and objects */ + opal_pmix2x_event_t *event; opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s RECEIVED NOTIFICATION OF STATUS %d", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), status); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + cd = OBJ_NEW(pmix2x_threadshift_t); cd->id = evhdlr_registration_id; cd->pmixcbfunc = cbfunc; @@ -281,6 +265,7 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&cd->pname.jobid, source->nspace))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(cd); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return; } cd->pname.vpid = pmix2x_convert_rank(source->rank); @@ -315,11 +300,35 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, } } - /* now push it into the local thread */ - opal_event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _event_hdlr, cd); - OPAL_POST_OBJECT(cd); - opal_event_active(&cd->ev, EV_WRITE, 1); + /* cycle thru the registrations */ + OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) { + if (evhdlr_registration_id == event->index) { + /* found it - invoke the handler, pointing its + * callback function to our callback function */ + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s _EVENT_HDLR CALLING EVHDLR", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + if (NULL != event->handler) { + OBJ_RETAIN(event); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + event->handler(cd->status, &cd->pname, + cd->info, &cd->results, + return_local_event_hdlr, cd); + OBJ_RELEASE(event); + return; + } + } + } + + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + + /* if we didn't find a match, we still have to call their final callback */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); + } + OPAL_LIST_RELEASE(cd->info); + OBJ_RELEASE(cd); + return; } opal_vpid_t pmix2x_convert_rank(pmix_rank_t rank) @@ -627,6 +636,20 @@ pmix_persistence_t pmix2x_convert_opalpersist(opal_pmix_persistence_t persist) } } +char* pmix2x_convert_jobid(opal_jobid_t jobid) +{ + opal_pmix2x_jobid_trkr_t *jptr; + + /* look thru our list of jobids and find the + * corresponding nspace */ + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { + if (jptr->jobid == jobid) { + return jptr->nspace; + } + } + return NULL; +} + /**** RHC: NEED TO ADD SUPPORT FOR NEW PMIX DATA TYPES, INCLUDING **** CONVERSION OF PROC STATES ****/ @@ -1004,41 +1027,47 @@ static void errreg_cbfunc (pmix_status_t status, OBJ_RELEASE(op); } -static void _reg_hdlr(int sd, short args, void *cbdata) +static void register_handler(opal_list_t *event_codes, + opal_list_t *info, + opal_pmix_notification_fn_t evhandler, + opal_pmix_evhandler_reg_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - pmix2x_opcaddy_t *op; - opal_value_t *kv; + pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; size_t n; + opal_value_t *kv; - OPAL_ACQUIRE_OBJECT(cd); - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s REGISTER HANDLER CODES %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - (NULL == cd->event_codes) ? "NULL" : "NON-NULL"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, 0, cbdata); + } + return; + } op = OBJ_NEW(pmix2x_opcaddy_t); - op->evregcbfunc = cd->cbfunc; - op->cbdata = cd->cbdata; + op->evregcbfunc = cbfunc; + op->cbdata = cbdata; /* convert the event codes */ - if (NULL != cd->event_codes) { - op->ncodes = opal_list_get_size(cd->event_codes); + if (NULL != event_codes) { + op->ncodes = opal_list_get_size(event_codes); op->pcodes = (pmix_status_t*)malloc(op->ncodes * sizeof(pmix_status_t)); n=0; - OPAL_LIST_FOREACH(kv, cd->event_codes, opal_value_t) { + OPAL_LIST_FOREACH(kv, event_codes, opal_value_t) { op->pcodes[n] = pmix2x_convert_opalrc(kv->data.integer); ++n; } } /* convert the list of info to an array of pmix_info_t */ - if (NULL != cd->info) { - op->ninfo = opal_list_get_size(cd->info); + if (NULL != info) { + op->ninfo = opal_list_get_size(info); if (0 < op->ninfo) { PMIX_INFO_CREATE(op->info, op->ninfo); n=0; - OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { + OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); pmix2x_value_load(&op->info[n].value, kv); ++n; @@ -1048,60 +1077,49 @@ static void _reg_hdlr(int sd, short args, void *cbdata) /* register the event */ op->event = OBJ_NEW(opal_pmix2x_event_t); - op->event->handler = cd->evhandler; + op->event->handler = evhandler; opal_list_append(&mca_pmix_pmix2x_component.events, &op->event->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + PMIx_Register_event_handler(op->pcodes, op->ncodes, op->info, op->ninfo, pmix2x_event_hdlr, errreg_cbfunc, op); - - OBJ_RELEASE(cd); return; } -static void register_handler(opal_list_t *event_codes, - opal_list_t *info, - opal_pmix_notification_fn_t evhandler, - opal_pmix_evhandler_reg_cbfunc_t cbfunc, - void *cbdata) -{ - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ - - OPAL_PMIX_THREADSHIFT(event_codes, info, evhandler, _reg_hdlr, cbfunc, cbdata); - return; -} - -static void _dereg_hdlr(int sd, short args, void *cbdata) +static void deregister_handler(size_t evhandler, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; + pmix2x_opcaddy_t *op; opal_pmix2x_event_t *event; - OPAL_ACQUIRE_OBJECT(cd); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, cbdata); + } + return; + } + /* look for this event */ OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) { - if (cd->handler == event->index) { + if (evhandler == event->index) { opal_list_remove_item(&mca_pmix_pmix2x_component.events, &event->super); OBJ_RELEASE(event); break; } } - /* tell the library to deregister this handler */ - PMIx_Deregister_event_handler(cd->handler, NULL, NULL); - /* release the caller */ - if (NULL != cd->opcbfunc) { - cd->opcbfunc(OPAL_SUCCESS, cd->cbdata); - } - OBJ_RELEASE(cd); -} + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); -static void deregister_handler(size_t evhandler, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ - OPAL_PMIX_OP_THREADSHIFT(evhandler, _dereg_hdlr, cbfunc, cbdata); + op = OBJ_NEW(pmix2x_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + + /* tell the library to deregister this handler */ + PMIx_Deregister_event_handler(evhandler, opcbfunc, op); return; } @@ -1114,57 +1132,56 @@ static void notify_complete(pmix_status_t status, void *cbdata) OBJ_RELEASE(op); } -static void _notify(int sd, short args, void *cbdata) +static int notify_event(int status, + const opal_process_name_t *source, + opal_pmix_data_range_t range, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t *)cbdata; pmix2x_opcaddy_t *op; opal_value_t *kv; pmix_proc_t p, *pptr; pmix_status_t pstatus; size_t n; - int rc=OPAL_SUCCESS; pmix_data_range_t prange; - opal_pmix2x_jobid_trkr_t *job, *jptr; + char *nsptr; - OPAL_ACQUIRE_OBJECT(cd); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } op = OBJ_NEW(pmix2x_opcaddy_t); /* convert the status */ - pstatus = pmix2x_convert_opalrc(cd->status); + pstatus = pmix2x_convert_opalrc(status); /* convert the source */ - if (NULL == cd->source) { + if (NULL == source) { pptr = NULL; } else { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == cd->source->jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - rc = OPAL_ERR_NOT_FOUND; - goto release; + if (NULL == (nsptr = pmix2x_convert_jobid(source->jobid))) { + OBJ_RELEASE(op); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } - (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); - p.rank = pmix2x_convert_opalrank(cd->source->vpid); + (void)strncpy(p.nspace, nsptr, PMIX_MAX_NSLEN); + p.rank = pmix2x_convert_opalrank(source->vpid); pptr = &p; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* convert the range */ - prange = pmix2x_convert_opalrange(cd->range); + prange = pmix2x_convert_opalrange(range); /* convert the list of info */ - if (NULL != cd->info) { - op->ninfo = opal_list_get_size(cd->info); + if (NULL != info) { + op->ninfo = opal_list_get_size(info); if (0 < op->ninfo) { PMIX_INFO_CREATE(op->info, op->ninfo); n=0; - OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { + OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); pmix2x_value_load(&op->info[n].value, kv); ++n; @@ -1174,26 +1191,8 @@ static void _notify(int sd, short args, void *cbdata) /* ask the library to notify our clients */ pstatus = PMIx_Notify_event(pstatus, pptr, prange, op->info, op->ninfo, notify_complete, op); - rc = pmix2x_convert_rc(pstatus); - - release: - /* release the caller */ - if (NULL != cd->opcbfunc) { - cd->opcbfunc(rc, cd->cbdata); - } - OBJ_RELEASE(cd); -} -static int notify_event(int status, - const opal_process_name_t *source, - opal_pmix_data_range_t range, - opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata) -{ - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ - OPAL_PMIX_NOTIFY_THREADSHIFT(status, source, range, info, _notify, cbfunc, cbdata); - return OPAL_SUCCESS; + return pmix2x_convert_rc(pstatus); } static void relcbfunc(void *cbdata) @@ -1254,6 +1253,14 @@ static void pmix2x_query(opal_list_t *queries, pmix_status_t prc; opal_pmix_query_t *q; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = OPAL_ERR_NOT_INITIALIZED; + goto CLEANUP; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* create the caddy */ cd = OBJ_NEW(pmix2x_opcaddy_t); @@ -1304,18 +1311,6 @@ static void pmix2x_query(opal_list_t *queries, return; } -static void opcbfunc(pmix_status_t status, void *cbdata) -{ - pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; - - OPAL_ACQUIRE_OBJECT(op); - - if (NULL != op->opcbfunc) { - op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); - } - OBJ_RELEASE(op); -} - static void pmix2x_log(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { @@ -1325,6 +1320,14 @@ static void pmix2x_log(opal_list_t *info, pmix2x_opcaddy_t *cd; pmix_status_t prc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = OPAL_ERR_NOT_INITIALIZED; + goto CLEANUP; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* create the caddy */ cd = OBJ_NEW(pmix2x_opcaddy_t); @@ -1388,27 +1391,36 @@ OBJ_CLASS_INSTANCE(opal_pmix2x_jobid_trkr_t, static void evcon(opal_pmix2x_event_t *p) { + OPAL_PMIX_CONSTRUCT_LOCK(&p->lock); p->handler = NULL; p->cbdata = NULL; } +static void evdes(opal_pmix2x_event_t *p) +{ + OPAL_PMIX_DESTRUCT_LOCK(&p->lock); +} OBJ_CLASS_INSTANCE(opal_pmix2x_event_t, opal_list_item_t, - evcon, NULL); + evcon, evdes); static void opcon(pmix2x_opcaddy_t *p) { memset(&p->p, 0, sizeof(pmix_proc_t)); + p->nspace = NULL; p->procs = NULL; p->nprocs = 0; + p->pdata = NULL; + p->npdata = 0; p->error_procs = NULL; p->nerror_procs = 0; p->info = NULL; p->ninfo = 0; p->apps = NULL; p->sz = 0; - p->active = false; + OPAL_PMIX_CONSTRUCT_LOCK(&p->lock); p->codes = NULL; p->pcodes = NULL; + p->ncodes = 0; p->queries = NULL; p->nqueries = 0; p->event = NULL; @@ -1418,17 +1430,25 @@ static void opcon(pmix2x_opcaddy_t *p) p->lkcbfunc = NULL; p->spcbfunc = NULL; p->evregcbfunc = NULL; + p->qcbfunc = NULL; p->cbdata = NULL; } static void opdes(pmix2x_opcaddy_t *p) { + OPAL_PMIX_DESTRUCT_LOCK(&p->lock); + if (NULL != p->nspace) { + free(p->nspace); + } if (NULL != p->procs) { PMIX_PROC_FREE(p->procs, p->nprocs); } + if (NULL != p->pdata) { + PMIX_PDATA_FREE(p->pdata, p->npdata); + } if (NULL != p->error_procs) { PMIX_PROC_FREE(p->error_procs, p->nerror_procs); } - if (0 < p->ninfo) { + if (NULL != p->info) { PMIX_INFO_FREE(p->info, p->ninfo); } if (NULL != p->apps) { @@ -1473,7 +1493,9 @@ OBJ_CLASS_INSTANCE(pmix2x_opalcaddy_t, static void tscon(pmix2x_threadshift_t *p) { - p->active = false; + OPAL_PMIX_CONSTRUCT_LOCK(&p->lock); + p->msg = NULL; + p->strings = NULL; p->source = NULL; p->event_codes = NULL; p->info = NULL; @@ -1486,6 +1508,10 @@ static void tscon(pmix2x_threadshift_t *p) } static void tsdes(pmix2x_threadshift_t *p) { + OPAL_PMIX_DESTRUCT_LOCK(&p->lock); + if (NULL != p->strings) { + free(p->strings); + } OPAL_LIST_DESTRUCT(&p->results); } OBJ_CLASS_INSTANCE(pmix2x_threadshift_t, diff --git a/opal/mca/pmix/pmix2x/pmix2x.h b/opal/mca/pmix/pmix2x/pmix2x.h index ccc18728aca..129802f2bed 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.h +++ b/opal/mca/pmix/pmix2x/pmix2x.h @@ -31,7 +31,7 @@ #include "opal/mca/event/event.h" #include "opal/util/proc.h" -#include "opal/mca/pmix/pmix.h" +#include "opal/mca/pmix/base/base.h" #include "pmix_server.h" #include "pmix_common.h" @@ -62,6 +62,7 @@ OBJ_CLASS_DECLARATION(opal_pmix2x_jobid_trkr_t); typedef struct { opal_list_item_t super; + opal_pmix_lock_t lock; size_t index; opal_pmix_notification_fn_t handler; void *cbdata; @@ -78,17 +79,21 @@ OBJ_CLASS_DECLARATION(opal_pmix2x_dmx_trkr_t); typedef struct { opal_object_t super; + opal_event_t ev; pmix_status_t status; + char *nspace; pmix_proc_t p; pmix_proc_t *procs; size_t nprocs; + pmix_pdata_t *pdata; + size_t npdata; pmix_proc_t *error_procs; size_t nerror_procs; pmix_info_t *info; size_t ninfo; pmix_app_t *apps; size_t sz; - volatile bool active; + opal_pmix_lock_t lock; opal_list_t *codes; pmix_status_t *pcodes; size_t ncodes; @@ -127,7 +132,9 @@ OBJ_CLASS_DECLARATION(pmix2x_opalcaddy_t); typedef struct { opal_object_t super; opal_event_t ev; - volatile bool active; + opal_pmix_lock_t lock; + const char *msg; + char *strings; size_t id; int status; opal_process_name_t pname; @@ -136,6 +143,7 @@ typedef struct { opal_pmix_data_range_t range; bool nondefault; size_t handler; + opal_value_t *val; opal_list_t *event_codes; opal_list_t *info; opal_list_t results; @@ -143,6 +151,8 @@ typedef struct { opal_pmix_evhandler_reg_cbfunc_t cbfunc; opal_pmix_op_cbfunc_t opcbfunc; pmix_event_notification_cbfunc_fn_t pmixcbfunc; + opal_pmix_value_cbfunc_t valcbfunc; + opal_pmix_lookup_cbfunc_t lkcbfunc; void *cbdata; } pmix2x_threadshift_t; OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); @@ -191,6 +201,14 @@ OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); opal_event_active(&((_cd)->ev), EV_WRITE, 1); \ } while(0) +#define OPAL_PMIX2X_THREADSHIFT(p, cb) \ + do { \ + opal_event_assign(&((p)->ev), opal_pmix_base.evbase, \ + -1, EV_WRITE, (cb), (p)); \ + OPAL_POST_OBJECT(p); \ + opal_event_active(&((p)->ev), EV_WRITE, 1); \ + } while(0) + /**** CLIENT FUNCTIONS ****/ OPAL_MODULE_DECLSPEC int pmix2x_client_init(opal_list_t *ilist); OPAL_MODULE_DECLSPEC int pmix2x_client_finalize(void); @@ -296,6 +314,8 @@ OPAL_MODULE_DECLSPEC int pmix2x_value_unload(opal_value_t *kv, OPAL_MODULE_DECLSPEC opal_pmix_alloc_directive_t pmix2x_convert_allocdir(pmix_alloc_directive_t dir); +OPAL_MODULE_DECLSPEC char* pmix2x_convert_jobid(opal_jobid_t jobid); + END_C_DECLS #endif /* MCA_PMIX_EXTERNAL_H */ diff --git a/opal/mca/pmix/pmix2x/pmix2x_client.c b/opal/mca/pmix/pmix2x/pmix2x_client.c index 29605b9a41b..e4c73854101 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_client.c +++ b/opal/mca/pmix/pmix2x/pmix2x_client.c @@ -37,17 +37,6 @@ static pmix_proc_t my_proc; static char *dbgvalue=NULL; -static volatile bool regactive; -static bool initialized = false; - -#define PMIX_WAIT_FOR_COMPLETION(a) \ - do { \ - while ((a)) { \ - usleep(10); \ - } \ - OPAL_ACQUIRE_OBJECT(a); \ - } while (0) - static void errreg_cbfunc (pmix_status_t status, size_t errhandler_ref, @@ -61,8 +50,8 @@ static void errreg_cbfunc (pmix_status_t status, opal_output_verbose(5, opal_pmix_base_framework.framework_output, "PMIX client errreg_cbfunc - error handler registered status=%d, reference=%lu", status, (unsigned long)errhandler_ref); - regactive = false; - OPAL_POST_OBJECT(regactive); + OPAL_POST_OBJECT(event); + OPAL_PMIX_WAKEUP_THREAD(&event->lock); } int pmix2x_client_init(opal_list_t *ilist) @@ -79,7 +68,9 @@ int pmix2x_client_init(opal_list_t *ilist) opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client init"); - if (!initialized) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + + if (0 == opal_pmix_base.initialized) { if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); putenv(dbgvalue); @@ -99,24 +90,28 @@ int pmix2x_client_init(opal_list_t *ilist) } } else { pinfo = NULL; + ninfo = 0; } } else { pinfo = NULL; ninfo = 0; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); rc = PMIx_Init(&my_proc, pinfo, ninfo); + if (NULL != pinfo) { + PMIX_INFO_FREE(pinfo, ninfo); + } if (PMIX_SUCCESS != rc) { return pmix2x_convert_rc(rc); } - if (0 < ninfo) { - PMIX_INFO_FREE(pinfo, ninfo); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); - } - if (initialized) { + ++opal_pmix_base.initialized; + if (1 < opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_SUCCESS; } - initialized = true; /* store our jobid and rank */ if (NULL != getenv(OPAL_MCA_PREFIX"orte_launch")) { @@ -139,44 +134,70 @@ int pmix2x_client_init(opal_list_t *ilist) pname.vpid = pmix2x_convert_rank(my_proc.rank); opal_proc_set_name(&pname); + /* release the thread in case the event handler fires when + * registered */ + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* register the default event handler */ event = OBJ_NEW(opal_pmix2x_event_t); opal_list_append(&mca_pmix_pmix2x_component.events, &event->super); PMIX_INFO_CREATE(pinfo, 1); PMIX_INFO_LOAD(&pinfo[0], PMIX_EVENT_HDLR_NAME, "OPAL-PMIX-2X-DEFAULT", PMIX_STRING); - regactive = true; - PMIx_Register_event_handler(NULL, 0, pinfo, 1, pmix2x_event_hdlr, errreg_cbfunc, event); - PMIX_WAIT_FOR_COMPLETION(regactive); + PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix2x_event_hdlr, errreg_cbfunc, event); + OPAL_PMIX_WAIT_THREAD(&event->lock); PMIX_INFO_FREE(pinfo, 1); return OPAL_SUCCESS; } +static void dereg_cbfunc(pmix_status_t st, void *cbdata) +{ + opal_pmix2x_event_t *ev = (opal_pmix2x_event_t*)cbdata; + OPAL_PMIX_WAKEUP_THREAD(&ev->lock); +} + int pmix2x_client_finalize(void) { pmix_status_t rc; - opal_pmix2x_event_t *event; + opal_pmix2x_event_t *event, *ev2; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client finalize"); - /* deregister all event handlers */ - OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) { - PMIx_Deregister_event_handler(event->index, NULL, NULL); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + --opal_pmix_base.initialized; + + if (0 < opal_pmix_base.initialized) { + /* deregister all event handlers */ + OPAL_LIST_FOREACH_SAFE(event, ev2, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) { + OPAL_PMIX_DESTRUCT_LOCK(&event->lock); + OPAL_PMIX_CONSTRUCT_LOCK(&event->lock); + PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event); + OPAL_PMIX_WAIT_THREAD(&event->lock); + opal_list_remove_item(&mca_pmix_pmix2x_component.events, &event->super); + OBJ_RELEASE(event); + } } - /* the list will be destructed when the component is finalized */ + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); rc = PMIx_Finalize(NULL, 0); + return pmix2x_convert_rc(rc); } int pmix2x_initialized(void) { + int init; + opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client initialized"); - return initialized; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + init = opal_pmix_base.initialized; + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + + return init; } int pmix2x_abort(int flag, const char *msg, @@ -186,37 +207,35 @@ int pmix2x_abort(int flag, const char *msg, pmix_proc_t *parray=NULL; size_t n, cnt=0; opal_namelist_t *ptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; + char *nsptr; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client abort"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* convert the list of procs to an array * of pmix_proc_t */ if (NULL != procs && 0 < (cnt = opal_list_get_size(procs))) { PMIX_PROC_CREATE(parray, cnt); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == ptr->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { PMIX_PROC_FREE(parray, cnt); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + (void)strncpy(parray[n].nspace, nsptr, PMIX_MAX_NSLEN); parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } } - /* call the library abort */ + /* call the library abort - this is a blocking call */ rc = PMIx_Abort(flag, msg, parray, cnt); /* release the array */ @@ -230,25 +249,21 @@ int pmix2x_store_local(const opal_process_name_t *proc, opal_value_t *val) pmix_value_t kv; pmix_status_t rc; pmix_proc_t p; - opal_pmix2x_jobid_trkr_t *job, *jptr; + char *nsptr; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); if (NULL != proc) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == proc->jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == (nsptr = pmix2x_convert_jobid(proc->jobid))) { return OPAL_ERR_NOT_FOUND; } - (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); + (void)strncpy(p.nspace, nsptr, PMIX_MAX_NSLEN); p.rank = pmix2x_convert_opalrank(proc->vpid); } else { /* use our name */ @@ -259,6 +274,7 @@ int pmix2x_store_local(const opal_process_name_t *proc, opal_value_t *val) PMIX_VALUE_CONSTRUCT(&kv); pmix2x_value_load(&kv, val); + /* call the library - this is a blocking call */ rc = PMIx_Store_internal(&p, val->key, &kv); PMIX_VALUE_DESTRUCT(&kv); @@ -269,6 +285,13 @@ int pmix2x_commit(void) { pmix_status_t rc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = PMIx_Commit(); return pmix2x_convert_rc(rc); } @@ -287,39 +310,39 @@ static void opcbfunc(pmix_status_t status, void *cbdata) int pmix2x_fence(opal_list_t *procs, int collect_data) { pmix_status_t rc; - pmix_proc_t *parray=NULL; - size_t n, cnt=0; opal_namelist_t *ptr; + char *nsptr; + size_t cnt, n; + pmix_proc_t *parray = NULL; pmix_info_t info, *iptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client fence"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* convert the list of procs to an array * of pmix_proc_t */ if (NULL != procs && 0 < (cnt = opal_list_get_size(procs))) { PMIX_PROC_CREATE(parray, cnt); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == ptr->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { PMIX_PROC_FREE(parray, cnt); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + (void)strncpy(parray[n].nspace, nsptr, PMIX_MAX_NSLEN); parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (collect_data) { PMIX_INFO_CONSTRUCT(&info); (void)strncpy(info.key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); @@ -332,17 +355,15 @@ int pmix2x_fence(opal_list_t *procs, int collect_data) n = 0; } - /* call the library function */ rc = PMIx_Fence(parray, cnt, iptr, n); - - /* release the array */ - PMIX_PROC_FREE(parray, cnt); - if (NULL != iptr) { + if (collect_data) { PMIX_INFO_DESTRUCT(&info); } + if (NULL != parray) { + PMIX_PROC_FREE(parray, cnt); + } return pmix2x_convert_rc(rc); - } int pmix2x_fencenb(opal_list_t *procs, int collect_data, @@ -353,14 +374,16 @@ int pmix2x_fencenb(opal_list_t *procs, int collect_data, size_t n, cnt=0; opal_namelist_t *ptr; pmix2x_opcaddy_t *op; - pmix_info_t info, *iptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; - - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + char *nsptr; opal_output_verbose(1, opal_pmix_base_framework.framework_output, - "PMIx_client fence_nb"); + "PMIx_client fencenb"); + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } /* convert the list of procs to an array * of pmix_proc_t */ @@ -368,36 +391,17 @@ int pmix2x_fencenb(opal_list_t *procs, int collect_data, PMIX_PROC_CREATE(parray, cnt); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == ptr->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { PMIX_PROC_FREE(parray, cnt); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + (void)strncpy(parray[n].nspace, nsptr, PMIX_MAX_NSLEN); parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } } - - if (collect_data) { - PMIX_INFO_CONSTRUCT(&info); - (void)strncpy(info.key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); - info.value.type = PMIX_BOOL; - info.value.data.flag = true; - iptr = &info; - n = 1; - } else { - iptr = NULL; - n = 0; - } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); @@ -406,14 +410,15 @@ int pmix2x_fencenb(opal_list_t *procs, int collect_data, op->procs = parray; op->nprocs = cnt; - /* call the library function */ - rc = PMIx_Fence_nb(parray, cnt, iptr, n, opcbfunc, op); - if (PMIX_SUCCESS != rc) { - OBJ_RELEASE(op); + if (collect_data) { + op->ninfo = 1; + PMIX_INFO_CREATE(op->info, op->ninfo); + PMIX_INFO_LOAD(&op->info[0], PMIX_COLLECT_DATA, NULL, PMIX_BOOL); } + /* call the library function */ + rc = PMIx_Fence_nb(op->procs, op->nprocs, op->info, op->ninfo, opcbfunc, op); return pmix2x_convert_rc(rc); - } int pmix2x_put(opal_pmix_scope_t opal_scope, @@ -426,6 +431,13 @@ int pmix2x_put(opal_pmix_scope_t opal_scope, opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client put"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + PMIX_VALUE_CONSTRUCT(&kv); pmix2x_value_load(&kv, val); @@ -437,87 +449,81 @@ int pmix2x_put(opal_pmix_scope_t opal_scope, int pmix2x_get(const opal_process_name_t *proc, const char *key, opal_list_t *info, opal_value_t **val) { - int ret; - pmix_value_t *kv; pmix_status_t rc; - pmix_proc_t p, *pptr; - size_t ninfo, n; - pmix_info_t *pinfo; + pmix_proc_t p; + char *nsptr; + pmix_info_t *pinfo = NULL; + size_t sz = 0, n; opal_value_t *ival; - opal_pmix2x_jobid_trkr_t *job, *jptr; + pmix_value_t *pval = NULL; opal_output_verbose(1, opal_pmix_base_framework.framework_output, - "%s PMIx_client get on proc %s key %s", + "%s pmix2x:client get on proc %s key %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (NULL == proc) ? "NULL" : OPAL_NAME_PRINT(*proc), key); - /* prep default response */ - *val = NULL; - if (NULL != proc) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == proc->jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - return OPAL_ERR_NOT_FOUND; - } - (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); - p.rank = pmix2x_convert_opalrank(proc->vpid); - pptr = &p; - } else { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + + if (NULL == proc) { /* if they are asking for our jobid, then return it */ if (0 == strcmp(key, OPAL_PMIX_JOBID)) { (*val) = OBJ_NEW(opal_value_t); (*val)->type = OPAL_UINT32; (*val)->data.uint32 = OPAL_PROC_MY_NAME.jobid; + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_SUCCESS; - } else if (0 == strcmp(key, OPAL_PMIX_RANK)) { + } + /* if they are asking for our rank, return it */ + if (0 == strcmp(key, OPAL_PMIX_RANK)) { (*val) = OBJ_NEW(opal_value_t); (*val)->type = OPAL_INT; (*val)->data.integer = pmix2x_convert_rank(my_proc.rank); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_SUCCESS; } - pptr = NULL; } + *val = NULL; + + if (NULL == proc) { + (void)strncpy(p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); + p.rank = pmix2x_convert_rank(PMIX_RANK_WILDCARD); + } else { + if (NULL == (nsptr = pmix2x_convert_jobid(proc->jobid))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(p.nspace, nsptr, PMIX_MAX_NSLEN); + p.rank = pmix2x_convert_opalrank(proc->vpid); + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); if (NULL != info) { - ninfo = opal_list_get_size(info); - if (0 < ninfo) { - PMIX_INFO_CREATE(pinfo, ninfo); + sz = opal_list_get_size(info); + if (0 < sz) { + PMIX_INFO_CREATE(pinfo, sz); n=0; OPAL_LIST_FOREACH(ival, info, opal_value_t) { (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); pmix2x_value_load(&pinfo[n].value, ival); ++n; } - } else { - pinfo = NULL; } - } else { - pinfo = NULL; - ninfo = 0; } - /* pass the request down */ - rc = PMIx_Get(pptr, key, pinfo, ninfo, &kv); + rc = PMIx_Get(&p, key, pinfo, sz, &pval); if (PMIX_SUCCESS == rc) { - if (NULL == kv) { - ret = OPAL_SUCCESS; - } else { - *val = OBJ_NEW(opal_value_t); - ret = pmix2x_value_unload(*val, kv); - PMIX_VALUE_FREE(kv, 1); - } - } else { - ret = pmix2x_convert_rc(rc); + ival = OBJ_NEW(opal_value_t); + pmix2x_value_unload(ival, pval); + *val = ival; + PMIX_VALUE_FREE(pval, 1); } - PMIX_INFO_FREE(pinfo, ninfo); - return ret; + PMIX_INFO_FREE(pinfo, sz); + + return pmix2x_convert_rc(rc); } static void val_cbfunc(pmix_status_t status, @@ -528,7 +534,7 @@ static void val_cbfunc(pmix_status_t status, opal_value_t val, *v=NULL; OPAL_ACQUIRE_OBJECT(op); - + OBJ_CONSTRUCT(&val, opal_value_t); rc = pmix2x_convert_opalrc(status); if (PMIX_SUCCESS == status && NULL != kv) { rc = pmix2x_value_unload(&val, kv); @@ -538,6 +544,7 @@ static void val_cbfunc(pmix_status_t status, if (NULL != op->valcbfunc) { op->valcbfunc(rc, v, op->cbdata); } + OBJ_DESTRUCT(&val); OBJ_RELEASE(op); } @@ -546,52 +553,73 @@ int pmix2x_getnb(const opal_process_name_t *proc, const char *key, opal_pmix_value_cbfunc_t cbfunc, void *cbdata) { pmix2x_opcaddy_t *op; + opal_value_t *val; pmix_status_t rc; + char *nsptr; size_t n; - opal_value_t *ival; - opal_pmix2x_jobid_trkr_t *job, *jptr; - - /* we must threadshift this request as we might not be in an event - * and we are going to access shared lists/objects */ opal_output_verbose(1, opal_pmix_base_framework.framework_output, "%s PMIx_client get_nb on proc %s key %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (NULL == proc) ? "NULL" : OPAL_NAME_PRINT(*proc), key); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + + if (NULL == proc) { + /* if they are asking for our jobid, then return it */ + if (0 == strcmp(key, OPAL_PMIX_JOBID)) { + if (NULL != cbfunc) { + val = OBJ_NEW(opal_value_t); + val->type = OPAL_UINT32; + val->data.uint32 = OPAL_PROC_MY_NAME.jobid; + cbfunc(OPAL_SUCCESS, val, cbdata); + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_SUCCESS; + } + /* if they are asking for our rank, return it */ + if (0 == strcmp(key, OPAL_PMIX_RANK)) { + if (NULL != cbfunc) { + val = OBJ_NEW(opal_value_t); + val->type = OPAL_INT; + val->data.integer = pmix2x_convert_rank(my_proc.rank); + cbfunc(OPAL_SUCCESS, val, cbdata); + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_SUCCESS; + } + } + /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); op->valcbfunc = cbfunc; op->cbdata = cbdata; - if (NULL != proc) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == proc->jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == proc) { + (void)strncpy(op->p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); + op->p.rank = pmix2x_convert_rank(PMIX_RANK_WILDCARD); + } else { + if (NULL == (nsptr = pmix2x_convert_jobid(proc->jobid))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(op->p.nspace, job->nspace, PMIX_MAX_NSLEN); + (void)strncpy(op->p.nspace, nsptr, PMIX_MAX_NSLEN); op->p.rank = pmix2x_convert_opalrank(proc->vpid); - } else { - (void)strncpy(op->p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); - op->p.rank = pmix2x_convert_rank(PMIX_RANK_WILDCARD); } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); if (NULL != info) { op->sz = opal_list_get_size(info); if (0 < op->sz) { PMIX_INFO_CREATE(op->info, op->sz); n=0; - OPAL_LIST_FOREACH(ival, info, opal_value_t) { - (void)strncpy(op->info[n].key, ival->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->info[n].value, ival); + OPAL_LIST_FOREACH(val, info, opal_value_t) { + (void)strncpy(op->info[n].key, val->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&op->info[n].value, val); ++n; } } @@ -616,6 +644,13 @@ int pmix2x_publish(opal_list_t *info) opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client publish"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL == info) { return OPAL_ERR_BAD_PARAM; } @@ -652,6 +687,13 @@ int pmix2x_publishnb(opal_list_t *info, opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client publish_nb"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL == info) { return OPAL_ERR_BAD_PARAM; } @@ -673,60 +715,58 @@ int pmix2x_publishnb(opal_list_t *info, } ret = PMIx_Publish_nb(op->info, op->sz, opcbfunc, op); - if (0 < op->sz) { - PMIX_INFO_FREE(op->info, op->sz); - } return pmix2x_convert_rc(ret); } int pmix2x_lookup(opal_list_t *data, opal_list_t *info) { - pmix_pdata_t *pdata; - pmix_info_t *pinfo; - size_t sz, ninfo, n; - int rc; - pmix_status_t ret; opal_pmix_pdata_t *d; + pmix_pdata_t *pdata; + pmix_info_t *pinfo = NULL; + pmix_status_t rc; + size_t cnt, n, sz; opal_value_t *iptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; + opal_pmix2x_jobid_trkr_t *jptr, *job; - /* we must threadshift this request as we might not be in an event - * and we are going to access shared lists/objects */ opal_output_verbose(1, opal_pmix_base_framework.framework_output, - "PMIx_client lookup"); + "pmix2x:client lookup"); - if (NULL == data) { - return OPAL_ERR_BAD_PARAM; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - sz = opal_list_get_size(data); - PMIX_PDATA_CREATE(pdata, sz); - n=0; + if (NULL == data || 0 == (cnt = opal_list_get_size(data))) { + return OPAL_ERR_BAD_PARAM; + } + PMIX_PDATA_CREATE(pdata, cnt); + n = 0; OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) { - (void)strncpy(pdata[n++].key, d->value.key, PMIX_MAX_KEYLEN); + (void)strncpy(pdata[n].key, d->value.key, PMIX_MAX_KEYLEN); + ++n; } if (NULL != info) { - ninfo = opal_list_get_size(info); - PMIX_INFO_CREATE(pinfo, ninfo); - n=0; - OPAL_LIST_FOREACH(iptr, info, opal_value_t) { - (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, iptr); - ++n; + sz = opal_list_get_size(info); + if (0 < sz) { + PMIX_INFO_CREATE(pinfo, sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&pinfo[n].value, iptr); + ++n; + } } - } else { - pinfo = NULL; - ninfo = 0; } - ret = PMIx_Lookup(pdata, sz, pinfo, ninfo); - PMIX_INFO_FREE(pinfo, ninfo); - - if (PMIX_SUCCESS == ret) { - /* transfer the data back */ + rc = PMIx_Lookup(pdata, cnt, pinfo, sz); + if (PMIX_SUCCESS == rc) { + /* load the answers back into the list */ n=0; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) { if (mca_pmix_pmix2x_component.native_launch) { /* if we were launched by the OMPI RTE, then @@ -752,17 +792,15 @@ int pmix2x_lookup(opal_list_t *data, opal_list_t *info) opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); } d->proc.vpid = pmix2x_convert_rank(pdata[n].proc.rank); - rc = pmix2x_value_unload(&d->value, &pdata[n].value); - if (OPAL_SUCCESS != rc) { - OPAL_ERROR_LOG(rc); - PMIX_PDATA_FREE(pdata, sz); - return OPAL_ERR_BAD_PARAM; - } - ++n; + pmix2x_value_unload(&d->value, &pdata[n].value); } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } - - return pmix2x_convert_rc(ret); + PMIX_PDATA_FREE(pdata, cnt); + if (NULL != pinfo) { + PMIX_INFO_FREE(pinfo, sz); + } + return pmix2x_convert_rc(rc); } static void lk_cbfunc(pmix_status_t status, @@ -778,17 +816,14 @@ static void lk_cbfunc(pmix_status_t status, OPAL_ACQUIRE_OBJECT(op); - /* this is in the PMIx local thread - need to threadshift to - * our own thread as we will be accessing framework-global - * lists and objects */ - if (NULL == op->lkcbfunc) { OBJ_RELEASE(op); return; } - rc = pmix2x_convert_rc(status); + rc = pmix2x_convert_rc(op->status); if (OPAL_SUCCESS == rc) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); OBJ_CONSTRUCT(&results, opal_list_t); for (n=0; n < ndata; n++) { d = OBJ_NEW(opal_pmix_pdata_t); @@ -822,11 +857,14 @@ static void lk_cbfunc(pmix_status_t status, if (OPAL_SUCCESS != rc) { rc = OPAL_ERR_BAD_PARAM; OPAL_ERROR_LOG(rc); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); goto release; } } r = &results; + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } + release: /* execute the callback */ op->lkcbfunc(rc, r, op->cbdata); @@ -847,7 +885,14 @@ int pmix2x_lookupnb(char **keys, opal_list_t *info, opal_output_verbose(1, opal_pmix_base_framework.framework_output, - "PMIx_client lookup_nb"); + "pmix2x:client lookup_nb"); + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); @@ -866,7 +911,6 @@ int pmix2x_lookupnb(char **keys, opal_list_t *info, } } } - ret = PMIx_Lookup_nb(keys, op->info, op->sz, lk_cbfunc, op); return pmix2x_convert_rc(ret); @@ -879,6 +923,13 @@ int pmix2x_unpublish(char **keys, opal_list_t *info) pmix_info_t *pinfo; opal_value_t *iptr; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != info) { ninfo = opal_list_get_size(info); PMIX_INFO_CREATE(pinfo, ninfo); @@ -907,6 +958,13 @@ int pmix2x_unpublishnb(char **keys, opal_list_t *info, opal_value_t *iptr; size_t n; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); op->opcbfunc = cbfunc; @@ -932,21 +990,30 @@ int pmix2x_unpublishnb(char **keys, opal_list_t *info, int pmix2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) { - pmix_status_t ret; - pmix_info_t *pinfo = NULL; + pmix_status_t rc; + pmix_info_t *info = NULL; pmix_app_t *papps; - size_t napps, n, m, ninfo = 0; - char nspace[PMIX_MAX_NSLEN+1]; - opal_value_t *info; + size_t ninfo, napps, n, m; + opal_value_t *ival; opal_pmix_app_t *app; + char nspace[PMIX_MAX_NSLEN+1]; opal_pmix2x_jobid_trkr_t *job; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + + *jobid = OPAL_JOBID_INVALID; + if (NULL != job_info && 0 < (ninfo = opal_list_get_size(job_info))) { - PMIX_INFO_CREATE(pinfo, ninfo); + PMIX_INFO_CREATE(info, ninfo); n=0; - OPAL_LIST_FOREACH(info, job_info, opal_value_t) { - (void)strncpy(pinfo[n].key, info->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, info); + OPAL_LIST_FOREACH(ival, job_info, opal_value_t) { + (void)strncpy(info[n].key, ival->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&info[n].value, ival); ++n; } } @@ -956,23 +1023,28 @@ int pmix2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) n=0; OPAL_LIST_FOREACH(app, apps, opal_pmix_app_t) { papps[n].cmd = strdup(app->cmd); - papps[n].argv = opal_argv_copy(app->argv); - papps[n].env = opal_argv_copy(app->env); + if (NULL != app->argv) { + papps[n].argv = opal_argv_copy(app->argv); + } + if (NULL != app->env) { + papps[n].env = opal_argv_copy(app->env); + } papps[n].maxprocs = app->maxprocs; if (0 < (papps[n].ninfo = opal_list_get_size(&app->info))) { PMIX_INFO_CREATE(papps[n].info, papps[n].ninfo); m=0; - OPAL_LIST_FOREACH(info, &app->info, opal_value_t) { - (void)strncpy(papps[n].info[m].key, info->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&papps[n].info[m].value, info); + OPAL_LIST_FOREACH(ival, &app->info, opal_value_t) { + (void)strncpy(papps[n].info[m].key, ival->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&papps[n].info[m].value, ival); ++m; } } ++n; } - ret = PMIx_Spawn(pinfo, ninfo, papps, napps, nspace); - if (PMIX_SUCCESS == ret) { + rc = PMIx_Spawn(info, ninfo, papps, napps, nspace); + if (PMIX_SUCCESS == rc) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); if (mca_pmix_pmix2x_component.native_launch) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ @@ -987,31 +1059,26 @@ int pmix2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); job->jobid = *jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } - if (0 < ninfo) { - PMIX_INFO_FREE(pinfo, ninfo); - } - PMIX_APP_FREE(papps, napps); - - return pmix2x_convert_rc(ret); + return rc; } static void spcbfunc(pmix_status_t status, char *nspace, void *cbdata) { pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; - int rc; - opal_jobid_t jobid=OPAL_JOBID_INVALID; opal_pmix2x_jobid_trkr_t *job; + opal_jobid_t jobid; + int rc; OPAL_ACQUIRE_OBJECT(op); - /* this is in the PMIx local thread - need to threadshift to - * our own thread as we will be accessing framework-global - * lists and objects */ - rc = pmix2x_convert_rc(status); if (PMIX_SUCCESS == status) { + /* this is in the PMIx local thread - need to protect + * the framework-level data */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); if (mca_pmix_pmix2x_component.native_launch) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ @@ -1026,6 +1093,7 @@ static void spcbfunc(pmix_status_t status, (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); job->jobid = jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } op->spcbfunc(rc, jobid, op->cbdata); @@ -1041,6 +1109,13 @@ int pmix2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, opal_value_t *info; opal_pmix_app_t *app; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); op->spcbfunc = cbfunc; @@ -1061,8 +1136,12 @@ int pmix2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, n=0; OPAL_LIST_FOREACH(app, apps, opal_pmix_app_t) { op->apps[n].cmd = strdup(app->cmd); - op->apps[n].argv = opal_argv_copy(app->argv); - op->apps[n].env = opal_argv_copy(app->env); + if (NULL != app->argv) { + op->apps[n].argv = opal_argv_copy(app->argv); + } + if (NULL != app->env) { + op->apps[n].env = opal_argv_copy(app->env); + } op->apps[n].maxprocs = app->maxprocs; if (0 < (op->apps[n].ninfo = opal_list_get_size(&app->info))) { PMIX_INFO_CREATE(op->apps[n].info, op->apps[n].ninfo); @@ -1083,43 +1162,45 @@ int pmix2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, int pmix2x_connect(opal_list_t *procs) { - pmix_status_t ret; - pmix_proc_t *parray=NULL; - size_t n, cnt=0; + pmix_proc_t *p; + size_t nprocs; opal_namelist_t *ptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; + pmix_status_t ret; + char *nsptr; + size_t n; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "pmix2x:client connect"); /* protect against bozo error */ - if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + if (NULL == procs || 0 == (nprocs = opal_list_get_size(procs))) { return OPAL_ERR_BAD_PARAM; } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* convert the list of procs to an array * of pmix_proc_t */ - PMIX_PROC_CREATE(parray, cnt); + PMIX_PROC_CREATE(p, nprocs); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == ptr->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); - PMIX_PROC_FREE(parray, cnt); + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { + PMIX_PROC_FREE(p, nprocs); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); - parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); + (void)strncpy(p[n].nspace, nsptr, PMIX_MAX_NSLEN); + p[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - ret = PMIx_Connect(parray, cnt, NULL, 0); - PMIX_PROC_FREE(parray, cnt); + ret = PMIx_Connect(p, nprocs, NULL, 0); + PMIX_PROC_FREE(p, nprocs); return pmix2x_convert_rc(ret); } @@ -1128,80 +1209,96 @@ int pmix2x_connectnb(opal_list_t *procs, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_status_t ret; - size_t n, cnt=0; - opal_namelist_t *ptr; pmix2x_opcaddy_t *op; - opal_pmix2x_jobid_trkr_t *job; + opal_namelist_t *ptr; + pmix_status_t ret; + char *nsptr; + size_t n; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "pmix2x:client connect NB"); /* protect against bozo error */ - if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + if (NULL == procs || 0 == opal_list_get_size(procs)) { return OPAL_ERR_BAD_PARAM; } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); op->opcbfunc = cbfunc; op->cbdata = cbdata; - op->nprocs = cnt; + op->nprocs = opal_list_get_size(procs); /* convert the list of procs to an array * of pmix_proc_t */ PMIX_PROC_CREATE(op->procs, op->nprocs); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (job->jobid == ptr->name.jobid) { - (void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN); - break; - } + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { + OBJ_RELEASE(op); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } + (void)strncpy(op->procs[n].nspace, nsptr, PMIX_MAX_NSLEN); op->procs[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); ret = PMIx_Connect_nb(op->procs, op->nprocs, NULL, 0, opcbfunc, op); - + if (PMIX_SUCCESS != ret) { + OBJ_RELEASE(op); + } return pmix2x_convert_rc(ret); } int pmix2x_disconnect(opal_list_t *procs) { - pmix_status_t ret; - pmix_proc_t *parray=NULL; - size_t n, cnt=0; + pmix_proc_t *p; + size_t nprocs; opal_namelist_t *ptr; - opal_pmix2x_jobid_trkr_t *job; + pmix_status_t ret; + char *nsptr; + size_t n; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "pmix2x:client disconnect"); /* protect against bozo error */ - if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + if (NULL == procs || 0 == (nprocs = opal_list_get_size(procs))) { return OPAL_ERR_BAD_PARAM; } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* convert the list of procs to an array * of pmix_proc_t */ - PMIX_PROC_CREATE(parray, cnt); + PMIX_PROC_CREATE(p, nprocs); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (job->jobid == ptr->name.jobid) { - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); - break; - } + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { + PMIX_PROC_FREE(p, nprocs); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } - parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); + (void)strncpy(p[n].nspace, nsptr, PMIX_MAX_NSLEN); + p[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - ret = PMIx_Disconnect(parray, cnt, NULL, 0); - PMIX_PROC_FREE(parray, cnt); + ret = PMIx_Disconnect(p, nprocs, NULL, 0); + PMIX_PROC_FREE(p, nprocs); return pmix2x_convert_rc(ret); } @@ -1210,83 +1307,86 @@ int pmix2x_disconnectnb(opal_list_t *procs, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_status_t ret; - size_t n, cnt=0; - opal_namelist_t *ptr; pmix2x_opcaddy_t *op; - opal_pmix2x_jobid_trkr_t *job; + opal_namelist_t *ptr; + pmix_status_t ret; + char *nsptr; + size_t n; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "pmix2x:client disconnect NB"); /* protect against bozo error */ - if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + if (NULL == procs || 0 == opal_list_get_size(procs)) { return OPAL_ERR_BAD_PARAM; } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); op->opcbfunc = cbfunc; op->cbdata = cbdata; - op->nprocs = cnt; + op->nprocs = opal_list_get_size(procs); /* convert the list of procs to an array * of pmix_proc_t */ PMIX_PROC_CREATE(op->procs, op->nprocs); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (job->jobid == ptr->name.jobid) { - (void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN); - break; - } + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { + OBJ_RELEASE(op); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } + (void)strncpy(op->procs[n].nspace, nsptr, PMIX_MAX_NSLEN); op->procs[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); ret = PMIx_Disconnect_nb(op->procs, op->nprocs, NULL, 0, opcbfunc, op); - + if (PMIX_SUCCESS != ret) { + OBJ_RELEASE(op); + } return pmix2x_convert_rc(ret); } - -int pmix2x_resolve_peers(const char *nodename, opal_jobid_t jobid, +int pmix2x_resolve_peers(const char *nodename, + opal_jobid_t jobid, opal_list_t *procs) { + pmix_status_t ret; char *nspace; pmix_proc_t *array=NULL; size_t nprocs, n; opal_namelist_t *nm; - int rc; - pmix_status_t ret; - opal_pmix2x_jobid_trkr_t *job, *jptr; + opal_pmix2x_jobid_trkr_t *job; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } - if (OPAL_JOBID_WILDCARD == jobid) { - nspace = NULL; - } else { - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (OPAL_JOBID_WILDCARD != jobid) { + if (NULL == (nspace = pmix2x_convert_jobid(jobid))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - nspace = job->nspace; + } else { + nspace = NULL; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); ret = PMIx_Resolve_peers(nodename, nspace, &array, &nprocs); - rc = pmix2x_convert_rc(ret); if (NULL != array && 0 < nprocs) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); for (n=0; n < nprocs; n++) { nm = OBJ_NEW(opal_namelist_t); opal_list_append(procs, &nm->super); @@ -1300,53 +1400,38 @@ int pmix2x_resolve_peers(const char *nodename, opal_jobid_t jobid, OPAL_HASH_JOBID(array[n].nspace, nm->name.jobid); } /* if we don't already have it, add this to our jobid tracker */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == nm->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == pmix2x_convert_jobid(nm->name.jobid)) { job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); - (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); - job->jobid = jobid; + (void)strncpy(job->nspace, array[n].nspace, PMIX_MAX_NSLEN); + job->jobid = nm->name.jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); } nm->name.vpid = pmix2x_convert_rank(array[n].rank); } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } PMIX_PROC_FREE(array, nprocs); - - return rc; + return pmix2x_convert_rc(ret); } int pmix2x_resolve_nodes(opal_jobid_t jobid, char **nodelist) { pmix_status_t ret; - char *nspace=NULL; - opal_pmix2x_jobid_trkr_t *job, *jptr; + char *nsptr; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } - if (OPAL_JOBID_WILDCARD != jobid) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - return OPAL_ERR_NOT_FOUND; - } - nspace = job->nspace; + if (NULL == (nsptr = pmix2x_convert_jobid(jobid))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - ret = PMIx_Resolve_nodes(nspace, nodelist); + ret = PMIx_Resolve_nodes(nsptr, nodelist); - return pmix2x_convert_rc(ret);; + return pmix2x_convert_rc(ret); } diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_north.c b/opal/mca/pmix/pmix2x/pmix2x_server_north.c index 7ba6156f166..220893a2432 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_north.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_north.c @@ -200,6 +200,10 @@ static pmix_status_t server_client_finalized_fn(const pmix_proc_t *p, void* serv opalcaddy->cbdata = cbdata; /* pass it up */ + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s FINALIZED", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); rc = host_module->client_finalized(&proc, server_object, opal_opcbfunc, opalcaddy); if (OPAL_SUCCESS != rc) { OBJ_RELEASE(opalcaddy); @@ -228,6 +232,11 @@ static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object, } proc.vpid = pmix2x_convert_rank(p->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED ABORT", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); + /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; @@ -279,10 +288,12 @@ static void opmdx_response(int status, const char *data, size_t sz, void *cbdata /* if we were collecting all data, then check for any pending * dmodx requests that we cached and notify them that the * data has arrived */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); while (NULL != (dmdx = (opal_pmix2x_dmx_trkr_t*)opal_list_remove_first(&mca_pmix_pmix2x_component.dmdx))) { dmdx->cbfunc(PMIX_SUCCESS, NULL, 0, dmdx->cbdata, NULL, NULL); OBJ_RELEASE(dmdx); } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } else { OBJ_RELEASE(opalcaddy); } @@ -299,6 +310,9 @@ static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, opal_value_t *iptr; int rc; + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s FENCE CALLED", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + if (NULL == host_module || NULL == host_module->fence_nb) { return PMIX_ERR_NOT_SUPPORTED; } @@ -359,6 +373,11 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, } proc.vpid = pmix2x_convert_rank(p->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED DMODX", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); + /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); opalcaddy->mdxcbfunc = cbfunc; @@ -372,10 +391,12 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, * arrived - this will trigger the pmix server to tell the * client that the data is available */ if (opal_pmix_base_async_modex && opal_pmix_collect_all_data) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); dmdx = OBJ_NEW(opal_pmix2x_dmx_trkr_t); dmdx->cbfunc = cbfunc; dmdx->cbdata = cbdata; opal_list_append(&mca_pmix_pmix2x_component.dmdx, &dmdx->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return PMIX_SUCCESS; } @@ -421,6 +442,11 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p, } proc.vpid = pmix2x_convert_rank(p->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED PUBLISH", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); + /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; @@ -497,6 +523,11 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, char **keys, } proc.vpid = pmix2x_convert_rank(p->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED LOOKUP", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); + /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); opalcaddy->lkupcbfunc = cbfunc; @@ -543,6 +574,11 @@ static pmix_status_t server_unpublish_fn(const pmix_proc_t *p, char **keys, } proc.vpid = pmix2x_convert_rank(p->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED UNPUBLISH", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); + /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; @@ -767,6 +803,10 @@ static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, opal_value_t *oinfo; int rc; + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s REGISTER EVENTS", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; @@ -795,6 +835,9 @@ static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, static pmix_status_t server_deregister_events(pmix_status_t *codes, size_t ncodes, pmix_op_cbfunc_t cbfunc, void *cbdata) { + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s DEREGISTER EVENTS", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + return PMIX_ERR_NOT_SUPPORTED; } @@ -829,6 +872,11 @@ static pmix_status_t server_notify_event(pmix_status_t code, } src.vpid = pmix2x_convert_rank(source->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED NOTIFY", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(src)); + /* ignore the range for now */ /* convert the info */ @@ -925,6 +973,11 @@ static pmix_status_t server_query(pmix_proc_t *proct, } requestor.vpid = pmix2x_convert_rank(proct->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED QUERY", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(requestor)); + /* convert the queries */ for (n=0; n < nqueries; n++) { q = OBJ_NEW(opal_pmix_query_t); diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_south.c b/opal/mca/pmix/pmix2x/pmix2x_server_south.c index ba8dd082efe..2a26e2cdb55 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_south.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_south.c @@ -52,29 +52,20 @@ extern pmix_server_module_t mymodule; extern opal_pmix_server_module_t *host_module; static char *dbgvalue=NULL; -static size_t errhdler_ref = 0; - -#define PMIX_WAIT_FOR_COMPLETION(a) \ - do { \ - while ((a)) { \ - usleep(10); \ - } \ - OPAL_ACQUIRE_OBJECT(a); \ - } while (0) static void errreg_cbfunc (pmix_status_t status, size_t errhandler_ref, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + opal_pmix2x_event_t *ev = (opal_pmix2x_event_t*)cbdata; - OPAL_ACQUIRE_OBJECT(active); - errhdler_ref = errhandler_ref; + OPAL_ACQUIRE_OBJECT(ev); + ev->index = errhandler_ref; opal_output_verbose(5, opal_pmix_base_framework.framework_output, "PMIX server errreg_cbfunc - error handler registered status=%d, reference=%lu", status, (unsigned long)errhandler_ref); - OPAL_POST_OBJECT(active); - *active = false; + OPAL_POST_OBJECT(ev); + OPAL_PMIX_WAKEUP_THREAD(&ev->lock); } static void opcbfunc(pmix_status_t status, void *cbdata) @@ -86,21 +77,15 @@ static void opcbfunc(pmix_status_t status, void *cbdata) if (NULL != op->opcbfunc) { op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); } - if (op->active) { - op->status = status; - OPAL_POST_OBJECT(op); - op->active = false; - } else { - OBJ_RELEASE(op); - } + OBJ_RELEASE(op); } -static void op2cbfunc(pmix_status_t status, void *cbdata) +static void lkcbfunc(pmix_status_t status, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + opal_pmix_lock_t *lk = (opal_pmix_lock_t*)cbdata; - OPAL_POST_OBJECT(active); - *active = false; + OPAL_POST_OBJECT(lk); + OPAL_PMIX_WAKEUP_THREAD(lk); } int pmix2x_server_init(opal_pmix_server_module_t *module, @@ -111,13 +96,19 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, opal_value_t *kv; pmix_info_t *pinfo; size_t sz, n; - volatile bool active; + opal_pmix2x_event_t *event; opal_pmix2x_jobid_trkr_t *job; + opal_pmix_lock_t lk; - if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { - asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); - putenv(dbgvalue); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + + if (0 == opal_pmix_base.initialized) { + if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { + asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); + putenv(dbgvalue); + } } + ++opal_pmix_base.initialized; /* convert the list to an array of pmix_info_t */ if (NULL != info) { @@ -140,6 +131,7 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, (void)opal_snprintf_jobid(job->nspace, PMIX_MAX_NSLEN, OPAL_PROC_MY_NAME.jobid); job->jobid = OPAL_PROC_MY_NAME.jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, pinfo, sz))) { PMIX_INFO_FREE(pinfo, sz); @@ -151,41 +143,53 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, host_module = module; /* register the default event handler */ - active = true; + event = OBJ_NEW(opal_pmix2x_event_t); + opal_list_append(&mca_pmix_pmix2x_component.events, &event->super); PMIX_INFO_CREATE(pinfo, 1); PMIX_INFO_LOAD(&pinfo[0], PMIX_EVENT_HDLR_NAME, "OPAL-PMIX-2X-SERVER-DEFAULT", PMIX_STRING); - PMIx_Register_event_handler(NULL, 0, pinfo, 1, pmix2x_event_hdlr, errreg_cbfunc, (void*)&active); - PMIX_WAIT_FOR_COMPLETION(active); + PMIx_Register_event_handler(NULL, 0, pinfo, 1, pmix2x_event_hdlr, errreg_cbfunc, (void*)event); + OPAL_PMIX_WAIT_THREAD(&event->lock); PMIX_INFO_FREE(pinfo, 1); /* as we might want to use some client-side functions, be sure * to register our own nspace */ + OPAL_PMIX_CONSTRUCT_LOCK(&lk); PMIX_INFO_CREATE(pinfo, 1); PMIX_INFO_LOAD(&pinfo[0], PMIX_REGISTER_NODATA, NULL, PMIX_BOOL); - active = true; - PMIx_server_register_nspace(job->nspace, 1, pinfo, 1, op2cbfunc, (void*)&active); - PMIX_WAIT_FOR_COMPLETION(active); + PMIx_server_register_nspace(job->nspace, 1, pinfo, 1, lkcbfunc, (void*)&lk); + OPAL_PMIX_WAIT_THREAD(&lk); + OPAL_PMIX_DESTRUCT_LOCK(&lk); PMIX_INFO_FREE(pinfo, 1); return OPAL_SUCCESS; } -static void fincb(pmix_status_t status, void *cbdata) +static void dereg_cbfunc(pmix_status_t st, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; - OPAL_POST_OBJECT(active); - *active = false; + opal_pmix2x_event_t *ev = (opal_pmix2x_event_t*)cbdata; + OPAL_PMIX_WAKEUP_THREAD(&ev->lock); } int pmix2x_server_finalize(void) { pmix_status_t rc; - volatile bool active; - - /* deregister the default event handler */ - active = true; - PMIx_Deregister_event_handler(errhdler_ref, fincb, (void*)&active); - PMIX_WAIT_FOR_COMPLETION(active); + opal_pmix2x_event_t *event, *ev2; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + --opal_pmix_base.initialized; + + if (0 < opal_pmix_base.initialized) { + /* deregister all event handlers */ + OPAL_LIST_FOREACH_SAFE(event, ev2, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) { + OPAL_PMIX_DESTRUCT_LOCK(&event->lock); + OPAL_PMIX_CONSTRUCT_LOCK(&event->lock); + PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event); + OPAL_PMIX_WAIT_THREAD(&event->lock); + opal_list_remove_item(&mca_pmix_pmix2x_component.events, &event->super); + OBJ_RELEASE(event); + } + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); rc = PMIx_server_finalize(); return pmix2x_convert_rc(rc); @@ -195,6 +199,13 @@ int pmix2x_server_gen_regex(const char *input, char **regex) { pmix_status_t rc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = PMIx_generate_regex(input, regex); return pmix2x_convert_rc(rc); } @@ -204,13 +215,23 @@ int pmix2x_server_gen_ppn(const char *input, char **ppn) { pmix_status_t rc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = PMIx_generate_ppn(input, ppn); return pmix2x_convert_rc(rc); } -static void _reg_nspace(int sd, short args, void *cbdata) +int pmix2x_server_register_nspace(opal_jobid_t jobid, + int nlocalprocs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; opal_value_t *kv, *k2; pmix_info_t *pinfo = NULL, *pmap; size_t sz, szmap, m, n; @@ -218,28 +239,31 @@ static void _reg_nspace(int sd, short args, void *cbdata) pmix_status_t rc; opal_list_t *pmapinfo; opal_pmix2x_jobid_trkr_t *job; - pmix2x_opcaddy_t op; - - OPAL_ACQUIRE_OBJECT(cd); + opal_pmix_lock_t lock; + int ret; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } /* convert the jobid */ - (void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, cd->jobid); + (void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, jobid); /* store this job in our list of known nspaces */ job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); - job->jobid = cd->jobid; + job->jobid = jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* convert the list to an array of pmix_info_t */ - if (NULL != cd->info) { - sz = opal_list_get_size(cd->info); + if (NULL != info) { + sz = opal_list_get_size(info); PMIX_INFO_CREATE(pinfo, sz); n = 0; - OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { + OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); if (0 == strcmp(kv->key, OPAL_PMIX_PROC_DATA)) { pinfo[n].value.type = PMIX_DATA_ARRAY; @@ -269,115 +293,63 @@ static void _reg_nspace(int sd, short args, void *cbdata) pinfo = NULL; } - OBJ_CONSTRUCT(&op, pmix2x_opcaddy_t); - op.active = true; - rc = PMIx_server_register_nspace(nspace, cd->status, pinfo, sz, - opcbfunc, (void*)&op); + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + rc = PMIx_server_register_nspace(nspace, nlocalprocs, pinfo, sz, + lkcbfunc, (void*)&lock); if (PMIX_SUCCESS == rc) { - PMIX_WAIT_FOR_COMPLETION(op.active); - } else { - op.status = rc; - } - /* ensure we execute the cbfunc so the caller doesn't hang */ - if (NULL != cd->opcbfunc) { - cd->opcbfunc(pmix2x_convert_rc(op.status), cd->cbdata); + OPAL_PMIX_WAIT_THREAD(&lock); } + OPAL_PMIX_DESTRUCT_LOCK(&lock); + if (NULL != pinfo) { PMIX_INFO_FREE(pinfo, sz); } - OBJ_DESTRUCT(&op); - OBJ_RELEASE(cd); -} - -int pmix2x_server_register_nspace(opal_jobid_t jobid, - int nlocalprocs, - opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - pmix2x_threadshift_t *cd; - - /* we must threadshift this request as it touches - * shared lists of objects */ - cd = OBJ_NEW(pmix2x_threadshift_t); - cd->jobid = jobid; - cd->status = nlocalprocs; - cd->info = info; - cd->opcbfunc = cbfunc; - cd->cbdata = cbdata; - /* if the cbfunc is NULL, then the caller is in an event - * and we can directly call the processing function */ - if (NULL == cbfunc) { - _reg_nspace(0, 0, cd); - } else { - opal_event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _reg_nspace, cd); - OPAL_POST_OBJECT(cd); - opal_event_active(&cd->ev, EV_WRITE, 1); - } - - return OPAL_SUCCESS; -} -static void tdcbfunc(pmix_status_t status, void *cbdata) -{ - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; + ret = pmix2x_convert_rc(rc); - OPAL_ACQUIRE_OBJECT(cd); - if (NULL != cd->opcbfunc) { - cd->opcbfunc(pmix2x_convert_rc(status), cd->cbdata); - } - if (cd->active) { - OPAL_POST_OBJECT(cd); - cd->active = false; - } else { - OBJ_RELEASE(cd); + /* release the caller */ + if (NULL != cbfunc) { + cbfunc(ret, cbdata); } + return ret; } -static void _dereg_nspace(int sd, short args, void *cbdata) +void pmix2x_server_deregister_nspace(opal_jobid_t jobid, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; opal_pmix2x_jobid_trkr_t *jptr; + opal_pmix_lock_t lock; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* release the caller */ + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, cbdata); + } + return; + } - OPAL_ACQUIRE_OBJECT(cd); /* if we don't already have it, we can ignore this */ OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == cd->jobid) { + if (jptr->jobid == jobid) { /* found it - tell the server to deregister */ - cd->active = true; - PMIx_server_deregister_nspace(jptr->nspace, tdcbfunc, cd); - PMIX_WAIT_FOR_COMPLETION(cd->active); - OBJ_RELEASE(cd); + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + PMIx_server_deregister_nspace(jptr->nspace, lkcbfunc, (void*)&lock); + OPAL_PMIX_WAIT_THREAD(&lock); + OPAL_PMIX_DESTRUCT_LOCK(&lock); /* now get rid of it from our list */ opal_list_remove_item(&mca_pmix_pmix2x_component.jobids, &jptr->super); OBJ_RELEASE(jptr); - return; + break; } } - /* must release the caller */ - tdcbfunc(PMIX_ERR_NOT_FOUND, cd); -} -void pmix2x_server_deregister_nspace(opal_jobid_t jobid, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - pmix2x_threadshift_t *cd; - - /* we must threadshift this request as it touches - * shared lists of objects */ - cd = OBJ_NEW(pmix2x_threadshift_t); - cd->jobid = jobid; - cd->opcbfunc = cbfunc; - cd->cbdata = cbdata; - if (NULL == cbfunc) { - _dereg_nspace(0, 0, cd); - } else { - opal_event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _dereg_nspace, cd); - OPAL_POST_OBJECT(cd); - opal_event_active(&cd->ev, EV_WRITE, 1); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* release the caller */ + if (NULL != cbfunc) { + cbfunc(OPAL_SUCCESS, cbdata); } } @@ -389,67 +361,64 @@ int pmix2x_server_register_client(const opal_process_name_t *proc, { pmix_status_t rc; pmix_proc_t p; - pmix2x_opcaddy_t op; + opal_pmix_lock_t lock; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* convert the jobid */ (void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc->jobid); p.rank = pmix2x_convert_opalrank(proc->vpid); - OBJ_CONSTRUCT(&op, pmix2x_opcaddy_t); - op.active = true; + OPAL_PMIX_CONSTRUCT_LOCK(&lock); rc = PMIx_server_register_client(&p, uid, gid, server_object, - opcbfunc, (void*)&op); + lkcbfunc, (void*)&lock); if (PMIX_SUCCESS == rc) { - PMIX_WAIT_FOR_COMPLETION(op.active); - rc = op.status; + OPAL_PMIX_WAIT_THREAD(&lock); } - OBJ_DESTRUCT(&op); + OPAL_PMIX_DESTRUCT_LOCK(&lock); return pmix2x_convert_rc(rc); } -static void _dereg_client(int sd, short args, void *cbdata) +/* tell the local PMIx server to cleanup this client as it is + * done executing */ +void pmix2x_server_deregister_client(const opal_process_name_t *proc, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; opal_pmix2x_jobid_trkr_t *jptr; pmix_proc_t p; + opal_pmix_lock_t lock; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, cbdata); + } + return; + } - OPAL_ACQUIRE_OBJECT(cd); /* if we don't already have it, we can ignore this */ OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == cd->source->jobid) { + if (jptr->jobid == proc->jobid) { /* found it - tell the server to deregister */ (void)strncpy(p.nspace, jptr->nspace, PMIX_MAX_NSLEN); - p.rank = pmix2x_convert_opalrank(cd->source->vpid); - cd->active = true; - PMIx_server_deregister_client(&p, tdcbfunc, (void*)cd); - PMIX_WAIT_FOR_COMPLETION(cd->active); + p.rank = pmix2x_convert_opalrank(proc->vpid); + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + PMIx_server_deregister_client(&p, lkcbfunc, (void*)&lock); + OPAL_PMIX_WAIT_THREAD(&lock); + OPAL_PMIX_DESTRUCT_LOCK(&lock); break; } } - OBJ_RELEASE(cd); -} - -/* tell the local PMIx server to cleanup this client as it is - * done executing */ -void pmix2x_server_deregister_client(const opal_process_name_t *proc, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - pmix2x_threadshift_t *cd; - - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ - cd = OBJ_NEW(pmix2x_threadshift_t); - cd->source = proc; - cd->opcbfunc = cbfunc; - cd->cbdata = cbdata; - if (NULL == cbfunc) { - _dereg_client(0, 0, cd); - } else { - opal_event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _dereg_client, cd); - OPAL_POST_OBJECT(cd); - opal_event_active(&cd->ev, EV_WRITE, 1); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != cbfunc) { + cbfunc(OPAL_SUCCESS, cbdata); } } @@ -459,6 +428,13 @@ int pmix2x_server_setup_fork(const opal_process_name_t *proc, char ***env) pmix_status_t rc; pmix_proc_t p; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* convert the jobid */ (void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc->jobid); p.rank = pmix2x_convert_opalrank(proc->vpid); @@ -489,6 +465,13 @@ int pmix2x_server_dmodex(const opal_process_name_t *proc, pmix2x_opcaddy_t *op; pmix_status_t rc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* setup the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); op->mdxcbfunc = cbfunc; @@ -518,6 +501,13 @@ int pmix2x_server_notify_event(int status, pmix_status_t rc; pmix2x_opcaddy_t *op; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* convert the list to an array of pmix_info_t */ if (NULL != info) { sz = opal_list_get_size(info); diff --git a/orte/mca/errmgr/default_app/errmgr_default_app.c b/orte/mca/errmgr/default_app/errmgr_default_app.c index 20f288f7060..065f2de28e3 100644 --- a/orte/mca/errmgr/default_app/errmgr_default_app.c +++ b/orte/mca/errmgr/default_app/errmgr_default_app.c @@ -32,6 +32,7 @@ #include "orte/util/error_strings.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" #include "orte/mca/rml/rml.h" @@ -69,10 +70,10 @@ static size_t myerrhandle = SIZE_MAX; static void register_cbfunc(int status, size_t errhndler, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + orte_lock_t *lk = (orte_lock_t*)cbdata; myerrhandle = errhndler; - ORTE_POST_OBJECT(active); - *active = false; + ORTE_POST_OBJECT(lk); + ORTE_WAKEUP_THREAD(lk); } static void notify_cbfunc(int status, @@ -116,22 +117,23 @@ static void notify_cbfunc(int status, static int init(void) { opal_list_t directives; - volatile bool active; + orte_lock_t lock; opal_value_t *kv; /* setup state machine to trap proc errors */ orte_state.add_proc_state(ORTE_PROC_STATE_ERROR, proc_errors, ORTE_ERROR_PRI); /* tie the default PMIx event handler back to us */ - active = true; + ORTE_CONSTRUCT_LOCK(&lock); OBJ_CONSTRUCT(&directives, opal_list_t); kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_EVENT_HDLR_NAME); kv->type = OPAL_STRING; kv->data.string = strdup("ORTE-APP-DEFAULT"); opal_list_append(&directives, &kv->super); - opal_pmix.register_evhandler(NULL, &directives, notify_cbfunc, register_cbfunc, (void*)&active); - ORTE_WAIT_FOR_COMPLETION(active); + opal_pmix.register_evhandler(NULL, &directives, notify_cbfunc, register_cbfunc, (void*)&lock); + ORTE_WAIT_THREAD(&lock); + ORTE_DESTRUCT_LOCK(&lock); OPAL_LIST_DESTRUCT(&directives); return ORTE_SUCCESS; diff --git a/orte/mca/oob/tcp/oob_tcp_component.c b/orte/mca/oob/tcp/oob_tcp_component.c index 13b550a8d99..0915e726e61 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.c +++ b/orte/mca/oob/tcp/oob_tcp_component.c @@ -185,7 +185,7 @@ static int tcp_component_open(void) static int tcp_component_close(void) { /* cleanup listen event list */ - OBJ_DESTRUCT(&mca_oob_tcp_component.listeners); + OPAL_LIST_DESTRUCT(&mca_oob_tcp_component.listeners); OBJ_DESTRUCT(&mca_oob_tcp_component.peers); @@ -695,27 +695,11 @@ static int component_startup(void) return rc; } -static void cleanup(int sd, short args, void *cbdata) -{ - opal_list_item_t * item; - bool *active = (bool*)cbdata; - - ORTE_ACQUIRE_OBJECT(active); - - while (NULL != (item = opal_list_remove_first(&mca_oob_tcp_component.listeners))) { - OBJ_RELEASE(item); - } - if (NULL != active) { - *active = false; - } -} - static void component_shutdown(void) { mca_oob_tcp_peer_t *peer; uint64_t ui64; int i = 0; - bool active; opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s TCP SHUTDOWN", @@ -750,24 +734,6 @@ static void component_shutdown(void) "no hnp or not active"); } - /* because the listeners are in a separate - * async thread for apps, we can't just release them here. - * Instead, we push it into that event thread and release - * them there */ - if (ORTE_PROC_IS_APP) { - opal_event_t ev; - active = true; - opal_event_set(orte_event_base, &ev, -1, - OPAL_EV_WRITE, cleanup, &active); - opal_event_set_priority(&ev, ORTE_ERROR_PRI); - ORTE_POST_OBJECT(active); - opal_event_active(&ev, OPAL_EV_WRITE, 1); - ORTE_WAIT_FOR_COMPLETION(active); - } else { - /* we can call the destruct directly */ - cleanup(0, 0, NULL); - } - opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s TCP SHUTDOWN done", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); diff --git a/orte/mca/rml/base/rml_base_frame.c b/orte/mca/rml/base/rml_base_frame.c index 7b0798cdb41..790b90b72a9 100644 --- a/orte/mca/rml/base/rml_base_frame.c +++ b/orte/mca/rml/base/rml_base_frame.c @@ -86,19 +86,19 @@ static int orte_rml_base_register(mca_base_register_flag_t flags) static void cleanup(int sd, short args, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + orte_lock_t *lk = (orte_lock_t*)cbdata; ORTE_ACQUIRE_OBJECT(active); OPAL_LIST_DESTRUCT(&orte_rml_base.posted_recvs); - if (NULL != active) { - ORTE_POST_OBJECT(active); - *active = false; + if (NULL != lk) { + ORTE_POST_OBJECT(lk); + ORTE_WAKEUP_THREAD(lk); } } static int orte_rml_base_close(void) { - volatile bool active; + orte_lock_t lock; int idx, total_conduits = opal_pointer_array_get_size(&orte_rml_base.conduits); orte_rml_base_module_t *mod; orte_rml_component_t *comp; @@ -127,13 +127,14 @@ static int orte_rml_base_close(void) * it there */ if (ORTE_PROC_IS_APP) { opal_event_t ev; - active = true; + ORTE_CONSTRUCT_LOCK(&lock); opal_event_set(orte_event_base, &ev, -1, - OPAL_EV_WRITE, cleanup, (void*)&active); + OPAL_EV_WRITE, cleanup, (void*)&lock); opal_event_set_priority(&ev, ORTE_ERROR_PRI); ORTE_POST_OBJECT(ev); opal_event_active(&ev, OPAL_EV_WRITE, 1); - ORTE_WAIT_FOR_COMPLETION(active); + ORTE_WAIT_THREAD(&lock); + ORTE_DESTRUCT_LOCK(&lock); } else { /* we can call the destruct directly */ cleanup(0, 0, NULL); diff --git a/orte/orted/pmix/pmix_server_dyn.c b/orte/orted/pmix/pmix_server_dyn.c index ad2e80c374b..8eacbbfe401 100644 --- a/orte/orted/pmix/pmix_server_dyn.c +++ b/orte/orted/pmix/pmix_server_dyn.c @@ -279,11 +279,20 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor, jdata->num_apps++; if (NULL != papp->cmd) { app->app = strdup(papp->cmd); + } else if (NULL == papp->argv || + NULL == papp->argv[0]) { + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + OBJ_RELEASE(jdata); + return ORTE_ERR_BAD_PARAM; } else { app->app = strdup(papp->argv[0]); } - app->argv = opal_argv_copy(papp->argv); - app->env = opal_argv_copy(papp->env); + if (NULL != papp->argv) { + app->argv = opal_argv_copy(papp->argv); + } + if (NULL != papp->env) { + app->env = opal_argv_copy(papp->env); + } if (NULL != papp->cwd) { app->cwd = strdup(papp->cwd); } diff --git a/orte/util/comm/comm.c b/orte/util/comm/comm.c index 426cbc4a69c..b4944db05de 100644 --- a/orte/util/comm/comm.c +++ b/orte/util/comm/comm.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2010-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,6 +38,7 @@ #include "orte/mca/rml/base/rml_contact.h" #include "orte/mca/routed/routed.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" @@ -807,4 +808,3 @@ int orte_util_comm_halt_vm(const orte_process_name_t *hnp) CLEANUP: return rc; } - diff --git a/orte/util/threads.h b/orte/util/threads.h index 63d2dcd3035..5bd1be82b5b 100644 --- a/orte/util/threads.h +++ b/orte/util/threads.h @@ -13,6 +13,7 @@ #include "orte_config.h" #include "opal/sys/atomic.h" +#include "opal/threads/threads.h" /* provide macros for forward-proofing the shifting * of objects between threads - at some point, we @@ -26,6 +27,12 @@ * we only have a memory barrier */ #define ORTE_ACQUIRE_OBJECT(o) opal_atomic_rmb() +#define orte_condition_wait(a,b) pthread_cond_wait(a, &(b)->m_lock_pthread) +typedef pthread_cond_t orte_condition_t; +#define orte_condition_broadcast(a) pthread_cond_broadcast(a) +#define orte_condition_signal(a) pthread_cond_signal(a) +#define ORTE_CONDITION_STATIC_INIT PTHREAD_COND_INITIALIZER + /* define a threadshift macro */ #define ORTE_THREADSHIFT(x, eb, f, p) \ do { \ @@ -35,4 +42,118 @@ opal_event_active(&((x)->ev), OPAL_EV_WRITE, 1); \ } while(0) +typedef struct { + opal_mutex_t mutex; + orte_condition_t cond; + volatile bool active; +} orte_lock_t; + +#define ORTE_CONSTRUCT_LOCK(l) \ + do { \ + OBJ_CONSTRUCT(&(l)->mutex, opal_mutex_t); \ + pthread_cond_init(&(l)->cond, NULL); \ + (l)->active = true; \ + } while(0) + +#define ORTE_DESTRUCT_LOCK(l) \ + do { \ + OBJ_DESTRUCT(&(l)->mutex); \ + pthread_cond_destroy(&(l)->cond); \ + } while(0) + + +#if OPAL_ENABLE_DEBUG +#define ORTE_ACQUIRE_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + if (opal_debug_threads) { \ + opal_output(0, "Waiting for thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + while ((lck)->active) { \ + orte_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + if (opal_debug_threads) { \ + opal_output(0, "Thread obtained %s:%d", \ + __FILE__, __LINE__); \ + } \ + (lck)->active = true; \ + OPAL_ACQUIRE_OBJECT(lck); \ + } while(0) +#else +#define ORTE_ACQUIRE_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + while ((lck)->active) { \ + orte_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + (lck)->active = true; \ + OPAL_ACQUIRE_OBJECT(lck); \ + } while(0) +#endif + + +#if OPAL_ENABLE_DEBUG +#define ORTE_WAIT_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + if (opal_debug_threads) { \ + opal_output(0, "Waiting for thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + while ((lck)->active) { \ + orte_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + if (opal_debug_threads) { \ + opal_output(0, "Thread obtained %s:%d", \ + __FILE__, __LINE__); \ + } \ + OPAL_ACQUIRE_OBJECT(&lck); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#else +#define ORTE_WAIT_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + while ((lck)->active) { \ + orte_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + OPAL_ACQUIRE_OBJECT(lck); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#endif + + +#if OPAL_ENABLE_DEBUG +#define ORTE_RELEASE_THREAD(lck) \ + do { \ + if (opal_debug_threads) { \ + opal_output(0, "Releasing thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + (lck)->active = false; \ + OPAL_POST_OBJECT(lck); \ + orte_condition_broadcast(&(lck)->cond); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#else +#define ORTE_RELEASE_THREAD(lck) \ + do { \ + (lck)->active = false; \ + OPAL_POST_OBJECT(lck); \ + orte_condition_broadcast(&(lck)->cond); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#endif + + +#define ORTE_WAKEUP_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + (lck)->active = false; \ + OPAL_POST_OBJECT(lck); \ + orte_condition_broadcast(&(lck)->cond); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) + #endif /* ORTE_THREADS_H */