1111 *                         All rights reserved. 
1212 * Copyright (c) 2006      Sun Microsystems, Inc.  All rights reserved. 
1313 * Copyright (c) 2008-2009 Cisco Systems, Inc.  All rights reserved. 
14+  * Copyright (c) 2017      IBM Corporation.  All rights reserved. 
1415 * $COPYRIGHT$ 
1516 * 
1617 * Additional copyrights may follow 
2425#ifdef  HAVE_UNISTD_H 
2526#include  <unistd.h> 
2627#endif 
28+ #ifdef  HAVE_SYS_TYPES_H 
29+ #include  <sys/types.h> 
30+ #endif 
31+ #ifdef  HAVE_SYS_STAT_H 
32+ #include  <sys/stat.h> 
33+ #endif 
34+ #ifdef  HAVE_SYS_FCNTL_H 
35+ #include  <fcntl.h> 
36+ #endif 
2737
2838#include  <string.h> 
2939#include  <signal.h> 
3444#include  "opal/util/output.h" 
3545#include  "opal/util/show_help.h" 
3646#include  "opal/util/argv.h" 
47+ #include  "opal/util/proc.h" 
3748#include  "opal/runtime/opal_params.h" 
3849
3950#ifndef  _NSIG 
4253
4354#define  HOSTFORMAT  "[%s:%05d] "
4455
56+ int     opal_stacktrace_output_fileno  =  -1 ;
57+ static  char   * opal_stacktrace_output_filename_base  =  NULL ;
58+ static  size_t  opal_stacktrace_output_filename_max_len  =  0 ;
4559static  char  stacktrace_hostname [OPAL_MAXHOSTNAMELEN ];
4660static  char  * unable_to_print_msg  =  "Unable to print stack trace!\n" ;
4761
62+ /* 
63+  * Set the stacktrace filename: 
64+  * stacktrace.PID 
65+  * -or, if VPID is available- 
66+  * stacktrace.VPID.PID 
67+  */ 
68+ static  void  set_stacktrace_filename (void ) {
69+     opal_proc_t  * my_proc  =  opal_proc_local_get ();
70+ 
71+     if ( NULL  ==  my_proc  ) {
72+         snprintf (opal_stacktrace_output_filename , opal_stacktrace_output_filename_max_len ,
73+                  "%s.%lu" ,
74+                  opal_stacktrace_output_filename_base , (unsigned long )getpid ());
75+     }
76+     else  {
77+         snprintf (opal_stacktrace_output_filename , opal_stacktrace_output_filename_max_len ,
78+                  "%s.%lu.%lu" ,
79+                  opal_stacktrace_output_filename_base , (unsigned long )my_proc -> proc_name .vpid , (unsigned long )getpid ());
80+     }
81+ 
82+     return ;
83+ }
84+ 
4885/** 
4986 * This function is being called as a signal-handler in response 
5087 * to a user-specified signal (e.g. SIGFPE or SIGSEGV). 
@@ -68,12 +105,37 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
68105    int  ret ;
69106    char  * si_code_str  =  "" ;
70107
108+     /* Do not print the stack trace */ 
109+     if ( 0  >  opal_stacktrace_output_fileno  &&  0  ==  opal_stacktrace_output_filename_max_len  ) {
110+         /* Raise the signal again, so we don't accidentally mask critical signals. 
111+          * For critical signals, it is preferred that we call 'raise' instead of 
112+          * 'exit' or 'abort' so that the return status is set properly for this 
113+          * process. 
114+          */ 
115+         signal (signo , SIG_DFL );
116+         raise (signo );
117+ 
118+         return ;
119+     }
120+ 
121+     /* Update the file name with the RANK, if available */ 
122+     if ( 0  <  opal_stacktrace_output_filename_max_len  ) {
123+         set_stacktrace_filename ();
124+         opal_stacktrace_output_fileno  =  open (opal_stacktrace_output_filename ,
125+                                              O_CREAT |O_WRONLY |O_TRUNC , S_IRUSR |S_IWUSR );
126+         if ( 0  >  opal_stacktrace_output_fileno  ) {
127+             opal_output (0 , "Error: Failed to open the stacktrace output file. Default: stderr\n\tFilename: %s\n\tErrno: %s" ,
128+                         opal_stacktrace_output_filename , strerror (errno ));
129+             opal_stacktrace_output_fileno  =  fileno (stderr );
130+         }
131+     }
132+ 
71133    /* write out the footer information */ 
72134    memset  (print_buffer , 0 , sizeof  (print_buffer ));
73135    ret  =  snprintf (print_buffer , sizeof (print_buffer ),
74136                   HOSTFORMAT  "*** Process received signal ***\n" ,
75137                   stacktrace_hostname , getpid ());
76-     write (fileno ( stderr ) , print_buffer , ret );
138+     write (opal_stacktrace_output_fileno , print_buffer , ret );
77139
78140
79141    memset  (print_buffer , 0 , sizeof  (print_buffer ));
@@ -323,14 +385,14 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
323385    }
324386
325387    /* write out the signal information generated above */ 
326-     write (fileno ( stderr ) , print_buffer , sizeof (print_buffer )- size );
388+     write (opal_stacktrace_output_fileno , print_buffer , sizeof (print_buffer )- size );
327389
328390    /* print out the stack trace */ 
329391    snprintf (print_buffer , sizeof (print_buffer ), HOSTFORMAT ,
330392             stacktrace_hostname , getpid ());
331-     ret  =  opal_backtrace_print (stderr , print_buffer , 2 );
393+     ret  =  opal_backtrace_print (NULL , print_buffer , 2 );
332394    if  (OPAL_SUCCESS  !=  ret ) {
333-         write (fileno ( stderr ) , unable_to_print_msg , strlen (unable_to_print_msg ));
395+         write (opal_stacktrace_output_fileno , unable_to_print_msg , strlen (unable_to_print_msg ));
334396    }
335397
336398    /* write out the footer information */ 
@@ -339,9 +401,15 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
339401                   HOSTFORMAT  "*** End of error message ***\n" ,
340402                   stacktrace_hostname , getpid ());
341403    if  (ret  >  0 ) {
342-         write (fileno ( stderr ) , print_buffer , ret );
404+         write (opal_stacktrace_output_fileno , print_buffer , ret );
343405    } else  {
344-         write (fileno (stderr ), unable_to_print_msg , strlen (unable_to_print_msg ));
406+         write (opal_stacktrace_output_fileno , unable_to_print_msg , strlen (unable_to_print_msg ));
407+     }
408+ 
409+     if ( fileno (stdout ) !=  opal_stacktrace_output_fileno  && 
410+         fileno (stderr ) !=  opal_stacktrace_output_fileno  ) {
411+         close (opal_stacktrace_output_fileno );
412+         opal_stacktrace_output_fileno  =  -1 ;
345413    }
346414
347415    /* Raise the signal again, so we don't accidentally mask critical signals. 
@@ -372,7 +440,30 @@ void opal_stackframe_output(int stream)
372440            opal_output (stream , "%s" , traces [i ]);
373441        }
374442    } else  {
375-         opal_backtrace_print (stderr , NULL , 2 );
443+         /* Do not print the stack trace */ 
444+         if ( 0  >  opal_stacktrace_output_fileno  &&  0  ==  opal_stacktrace_output_filename_max_len  ) {
445+             return ;
446+         }
447+ 
448+         /* Update the file name with the RANK, if available */ 
449+         if ( 0  <  opal_stacktrace_output_filename_max_len  ) {
450+             set_stacktrace_filename ();
451+             opal_stacktrace_output_fileno  =  open (opal_stacktrace_output_filename ,
452+                                                  O_CREAT |O_WRONLY |O_TRUNC , S_IRUSR |S_IWUSR );
453+             if ( 0  >  opal_stacktrace_output_fileno  ) {
454+                 opal_output (0 , "Error: Failed to open the stacktrace output file. Default: stderr\n\tFilename: %s\n\tErrno: %s" ,
455+                             opal_stacktrace_output_filename , strerror (errno ));
456+                 opal_stacktrace_output_fileno  =  fileno (stderr );
457+             }
458+         }
459+ 
460+         opal_backtrace_print (NULL , NULL , 2 );
461+ 
462+         if ( fileno (stdout ) !=  opal_stacktrace_output_fileno  && 
463+             fileno (stderr ) !=  opal_stacktrace_output_fileno  ) {
464+             close (opal_stacktrace_output_fileno );
465+             opal_stacktrace_output_fileno  =  -1 ;
466+         }
376467    }
377468}
378469
@@ -443,6 +534,50 @@ int opal_util_register_stackhandlers (void)
443534        }
444535    }
445536
537+     /* Setup the output stream to use */ 
538+     if ( NULL  ==  opal_stacktrace_output_filename  || 
539+         0  ==  strcasecmp (opal_stacktrace_output_filename , "none" ) ) {
540+         opal_stacktrace_output_fileno  =  -1 ;
541+     }
542+     else  if ( 0  ==  strcasecmp (opal_stacktrace_output_filename , "stdout" ) ) {
543+         opal_stacktrace_output_fileno  =  fileno (stdout );
544+     }
545+     else  if ( 0  ==  strcasecmp (opal_stacktrace_output_filename , "stderr" ) ) {
546+         opal_stacktrace_output_fileno  =  fileno (stdout );
547+     }
548+     else  if ( 0  ==  strcasecmp (opal_stacktrace_output_filename , "file"  ) || 
549+              0  ==  strcasecmp (opal_stacktrace_output_filename , "file:" ) ) {
550+         opal_stacktrace_output_filename_base  =  strdup ("stacktrace" );
551+ 
552+         free (opal_stacktrace_output_filename );
553+         // Magic number: 8 = space for .PID and .RANK (allow 7 digits each) 
554+         opal_stacktrace_output_filename_max_len  =  strlen ("stacktrace" ) +  8  +  8 ;
555+         opal_stacktrace_output_filename  =  (char * )malloc (sizeof (char ) *  opal_stacktrace_output_filename_max_len );
556+         set_stacktrace_filename ();
557+         opal_stacktrace_output_fileno  =  -1 ;
558+     }
559+     else  if ( 0  ==  strncasecmp (opal_stacktrace_output_filename , "file:" , 5 ) ) {
560+         char  * filename_cpy  =  NULL ;
561+         next  =  strchr (opal_stacktrace_output_filename , ':' );
562+         next ++ ; // move past the ':' to the filename specified 
563+ 
564+         opal_stacktrace_output_filename_base  =  strdup (next );
565+ 
566+         free (opal_stacktrace_output_filename );
567+         // Magic number: 8 = space for .PID and .RANK (allow 7 digits each) 
568+         opal_stacktrace_output_filename_max_len  =  strlen (opal_stacktrace_output_filename_base ) +  8  +  8 ;
569+         opal_stacktrace_output_filename  =  (char * )malloc (sizeof (char ) *  opal_stacktrace_output_filename_max_len );
570+         set_stacktrace_filename ();
571+         opal_stacktrace_output_fileno  =  -1 ;
572+ 
573+         free (filename_cpy );
574+     }
575+     else  {
576+         opal_stacktrace_output_fileno  =  fileno (stderr );
577+     }
578+ 
579+ 
580+     /* Setup the signals to catch */ 
446581    memset (& act , 0 , sizeof (act ));
447582    act .sa_sigaction  =  show_stackframe ;
448583    act .sa_flags  =  SA_SIGINFO ;
0 commit comments