@@ -117,8 +117,14 @@ static void pipe_closed(int fd, short flags, void *arg);
117117static void rollup (int status , orte_process_name_t * sender ,
118118 opal_buffer_t * buffer ,
119119 orte_rml_tag_t tag , void * cbdata );
120+ static void node_regex_report (int status , orte_process_name_t * sender ,
121+ opal_buffer_t * buffer ,
122+ orte_rml_tag_t tag , void * cbdata );
123+ static void report_orted (void );
124+
120125static opal_buffer_t * bucket , * mybucket = NULL ;
121126static int ncollected = 0 ;
127+ static bool node_regex_waiting = false;
122128
123129static char * orte_parent_uri = NULL ;
124130
@@ -734,6 +740,11 @@ int orte_daemon(int argc, char *argv[])
734740 * a little time in the launch phase by "warming up" the
735741 * connection to our parent while we wait for our children */
736742 buffer = OBJ_NEW (opal_buffer_t ); // zero-byte message
743+ if (NULL == orte_node_regex ) {
744+ orte_rml .recv_buffer_nb (ORTE_PROC_MY_PARENT , ORTE_RML_TAG_NODE_REGEX_REPORT ,
745+ ORTE_RML_PERSISTENT , node_regex_report , & node_regex_waiting );
746+ node_regex_waiting = true;
747+ }
737748 if (0 > (ret = orte_rml .send_buffer_nb (orte_mgmt_conduit ,
738749 ORTE_PROC_MY_PARENT , buffer ,
739750 ORTE_RML_TAG_WARMUP_CONNECTION ,
@@ -969,8 +980,10 @@ int orte_daemon(int argc, char *argv[])
969980 i += 2 ;
970981 }
971982 }
972- /* now launch any child daemons of ours */
973- orte_plm .remote_spawn (orte_tree_launch_cmd );
983+ if (NULL != orte_node_regex ) {
984+ /* now launch any child daemons of ours */
985+ orte_plm .remote_spawn (orte_tree_launch_cmd );
986+ }
974987 }
975988
976989 if (orte_debug_daemons_flag ) {
@@ -1052,8 +1065,6 @@ static void rollup(int status, orte_process_name_t* sender,
10521065 opal_buffer_t * buffer ,
10531066 orte_rml_tag_t tag , void * cbdata )
10541067{
1055- int nreqd ;
1056- char * rtmod ;
10571068 int ret ;
10581069 orte_process_name_t child ;
10591070 int32_t i , flag , cnt ;
@@ -1095,10 +1106,17 @@ static void rollup(int status, orte_process_name_t* sender,
10951106 }
10961107
10971108 report :
1109+ report_orted ();
1110+ }
1111+
1112+ static void report_orted () {
1113+ char * rtmod ;
1114+ int nreqd , ret ;
1115+
10981116 /* get the number of children */
10991117 rtmod = orte_rml .get_routed (orte_mgmt_conduit );
11001118 nreqd = orte_routed .num_routes (rtmod ) + 1 ;
1101- if (nreqd == ncollected && NULL != mybucket ) {
1119+ if (nreqd == ncollected && NULL != mybucket && ! node_regex_waiting ) {
11021120 /* add the collection of our children's buckets to ours */
11031121 opal_dss .copy_payload (mybucket , bucket );
11041122 OBJ_RELEASE (bucket );
@@ -1112,3 +1130,36 @@ static void rollup(int status, orte_process_name_t* sender,
11121130 }
11131131 }
11141132}
1133+
1134+ static void node_regex_report (int status , orte_process_name_t * sender ,
1135+ opal_buffer_t * buffer ,
1136+ orte_rml_tag_t tag , void * cbdata ) {
1137+ int rc , n = 1 ;
1138+ char * regex ;
1139+ assert (NULL == orte_node_regex );
1140+ bool * active = (bool * )cbdata ;
1141+
1142+ /* extract the node regex if needed, and update the routing tree */
1143+ n = 1 ;
1144+ if (ORTE_SUCCESS != (rc = opal_dss .unpack (buffer , & regex , & n , OPAL_STRING ))) {
1145+ ORTE_ERROR_LOG (rc );
1146+ return ;
1147+ }
1148+ orte_node_regex = regex ;
1149+
1150+ if (ORTE_SUCCESS != (rc = orte_util_nidmap_parse (orte_node_regex ))) {
1151+ ORTE_ERROR_LOG (rc );
1152+ return ;
1153+ }
1154+
1155+ /* update the routing tree so any tree spawn operation
1156+ * properly gets the number of children underneath us */
1157+ orte_routed .update_routing_plan (NULL );
1158+
1159+ * active = false;
1160+
1161+ /* now launch any child daemons of ours */
1162+ orte_plm .remote_spawn (orte_tree_launch_cmd );
1163+
1164+ report_orted ();
1165+ }
0 commit comments