5252
5353extern ompi_rte_orte_component_t mca_rte_orte_component ;
5454
55+ typedef struct {
56+ volatile bool active ;
57+ int status ;
58+ int errhandler ;
59+ } errhandler_t ;
60+
61+ static void register_cbfunc (int status , int errhndler , void * cbdata )
62+ {
63+ errhandler_t * cd = (errhandler_t * )cbdata ;
64+ cd -> status = status ;
65+ cd -> errhandler = errhndler ;
66+ cd -> active = false;
67+ }
68+
69+ static volatile bool wait_for_release = true;
70+ static int errhandler = -1 ;
71+
72+ static void notify_cbfunc (int status ,
73+ opal_list_t * procs ,
74+ opal_list_t * info ,
75+ opal_pmix_release_cbfunc_t cbfunc ,
76+ void * cbdata )
77+ {
78+ if (NULL != cbfunc ) {
79+ cbfunc (cbdata );
80+ }
81+ wait_for_release = false;
82+ }
83+
84+
85+ int ompi_rte_init (int * pargc , char * * * pargv )
86+ {
87+ int rc ;
88+ opal_list_t info ;
89+ opal_value_t val ;
90+ errhandler_t cd ;
91+
92+ if (ORTE_SUCCESS != (rc = orte_init (pargc , pargv , ORTE_PROC_MPI ))) {
93+ return rc ;
94+ }
95+
96+ if (!orte_standalone_operation ) {
97+ /* register to receive any debugger release */
98+ OBJ_CONSTRUCT (& info , opal_list_t );
99+ OBJ_CONSTRUCT (& val , opal_value_t );
100+ val .key = strdup (OPAL_PMIX_ERROR_NAME );
101+ val .type = OPAL_INT ;
102+ val .data .integer = OPAL_ERR_DEBUGGER_RELEASE ;
103+ opal_list_append (& info , & val .super );
104+ cd .status = ORTE_ERROR ;
105+ cd .errhandler = -1 ;
106+ cd .active = true;
107+
108+ opal_pmix .register_errhandler (& info , notify_cbfunc , register_cbfunc , & cd );
109+
110+ /* let the MPI progress engine run while we wait for
111+ * registration to complete */
112+ OMPI_WAIT_FOR_COMPLETION (cd .active );
113+ /* safely deconstruct the list */
114+ opal_list_remove_first (& info );
115+ OBJ_DESTRUCT (& val );
116+ OBJ_DESTRUCT (& info );
117+ if (OPAL_SUCCESS != cd .status ) {
118+ /* ouch - we are doomed */
119+ ORTE_ERROR_LOG (cd .status );
120+ return OMPI_ERROR ;
121+ }
122+ errhandler = cd .errhandler ;
123+ }
124+
125+ return OMPI_SUCCESS ;
126+ }
127+
55128void ompi_rte_abort (int error_code , char * fmt , ...)
56129{
57130 va_list arglist ;
@@ -100,10 +173,10 @@ void ompi_rte_abort(int error_code, char *fmt, ...)
100173 * attaching debuggers -- see big comment in
101174 * orte/tools/orterun/debuggers.c explaining the two scenarios.
102175 */
176+
103177void ompi_rte_wait_for_debugger (void )
104178{
105179 int debugger ;
106- orte_rml_recv_cb_t xfer ;
107180
108181 /* See lengthy comment in orte/tools/orterun/debuggers.c about
109182 orte_in_parallel_debugger */
@@ -117,12 +190,12 @@ void ompi_rte_wait_for_debugger(void)
117190 /* if not, just return */
118191 return ;
119192 }
120-
121193 /* if we are being debugged, then we need to find
122194 * the correct plug-ins
123195 */
124196 ompi_debugger_setup_dlls ();
125197
198+ /* wait for the debugger to attach */
126199 if (orte_standalone_operation ) {
127200 /* spin until debugger attaches and releases us */
128201 while (MPIR_debug_gate == 0 ) {
@@ -133,23 +206,9 @@ void ompi_rte_wait_for_debugger(void)
133206#endif
134207 }
135208 } else {
136- /* only the rank=0 proc waits for either a message from the
137- * HNP or for the debugger to attach - everyone else will just
138- * spin in * the grpcomm barrier in ompi_mpi_init until rank=0
139- * joins them.
140- */
141- if (0 != ORTE_PROC_MY_NAME -> vpid ) {
142- return ;
143- }
144-
145- /* VPID 0 waits for a message from the HNP */
146- OBJ_CONSTRUCT (& xfer , orte_rml_recv_cb_t );
147- xfer .active = true;
148- orte_rml .recv_buffer_nb (OMPI_NAME_WILDCARD ,
149- ORTE_RML_TAG_DEBUGGER_RELEASE ,
150- ORTE_RML_NON_PERSISTENT ,
151- orte_rml_recv_callback , & xfer );
152- /* let the MPI progress engine run while we wait */
153- OMPI_WAIT_FOR_COMPLETION (xfer .active );
209+ /* now wait for the notification to occur */
210+ OMPI_WAIT_FOR_COMPLETION (wait_for_release );
211+ /* deregister the errhandler */
212+ opal_pmix .deregister_errhandler (errhandler , NULL , NULL );
154213 }
155214}
0 commit comments