Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions opal/mca/backtrace/backtrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
* All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -26,6 +27,7 @@

#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/util/stacktrace.h"

BEGIN_C_DECLS

Expand All @@ -39,6 +41,8 @@ BEGIN_C_DECLS
/*
* Print back trace to FILE file with a prefix for each line.
* First strip lines are not printed.
* If 'file' is NULL then the component should try to use the file descriptor
* saved in opal_stacktrace_output_fileno
*
* \note some attempts made to be signal safe.
*/
Expand Down
9 changes: 7 additions & 2 deletions opal/mca/backtrace/execinfo/backtrace_execinfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -34,12 +35,16 @@
int
opal_backtrace_print(FILE *file, char *prefix, int strip)
{
int i, fd, len;
int i, len;
int trace_size;
void * trace[32];
char buf[6];
int fd = opal_stacktrace_output_fileno;

if( NULL != file ) {
fd = fileno(file);
}

fd = fileno (file);
if (-1 == fd) {
return OPAL_ERR_BAD_PARAM;
}
Expand Down
9 changes: 8 additions & 1 deletion opal/mca/backtrace/printstack/backtrace_printstack.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -27,7 +28,13 @@
int
opal_backtrace_print(FILE *file, char *prefix, int strip)
{
printstack(fileno(file));
int fd = opal_stacktrace_output_fileno;

if( NULL != file ) {
fd = fileno(file);
}

printstack(fd);

return OPAL_SUCCESS;
}
Expand Down
26 changes: 25 additions & 1 deletion opal/runtime/opal_params.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -48,6 +49,7 @@
#include "opal/util/timings.h"

char *opal_signal_string = NULL;
char *opal_stacktrace_output_filename = NULL;
char *opal_net_private_ipv4 = NULL;
char *opal_set_max_sys_limits = NULL;

Expand Down Expand Up @@ -76,6 +78,7 @@ static bool opal_register_done = false;
int opal_register_params(void)
{
int ret;
char *string = NULL;

if (opal_register_done) {
return OPAL_SUCCESS;
Expand All @@ -87,7 +90,6 @@ int opal_register_params(void)
* This string is going to be used in opal/util/stacktrace.c
*/
{
char *string = NULL;
int j;
int signals[] = {
#ifdef SIGABRT
Expand Down Expand Up @@ -127,6 +129,28 @@ int opal_register_params(void)
}
}

/*
* Where should the stack trace output be directed
* This string is going to be used in opal/util/stacktrace.c
*/
string = strdup("stderr");
opal_stacktrace_output_filename = string;
ret = mca_base_var_register ("opal", "opal", NULL, "stacktrace_output",
"Specifies where the stack trace output stream goes. "
"Accepts one of the following: none (disabled), stderr (default), stdout, file[:filename]. "
"If 'filename' is not specified, a default filename of 'stacktrace' is used. "
"The 'filename' is appended with either '.PID' or '.RANK.PID', if RANK is available. "
"The 'filename' can be an absolute path or a relative path to the current working directory.",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL,
&opal_stacktrace_output_filename);
free (string);
if (0 > ret) {
return ret;
}


#if defined(HAVE_SCHED_YIELD)
opal_progress_yield_when_idle = false;
ret = mca_base_var_register ("opal", "opal", "progress", "yield_when_idle",
Expand Down
2 changes: 2 additions & 0 deletions opal/runtime/opal_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
* Copyright (c) 2014 Hochschule Esslingen. All rights reserved.
* Copyright (c) 2015 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -29,6 +30,7 @@
#define OPAL_PARAMS_H

extern char *opal_signal_string;
extern char *opal_stacktrace_output_filename;
extern char *opal_net_private_ipv4;
extern char *opal_set_max_sys_limits;

Expand Down
157 changes: 150 additions & 7 deletions opal/util/stacktrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
* All rights reserved.
* Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -24,6 +25,15 @@
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_SYS_FCNTL_H
#include <fcntl.h>
#endif

#include <string.h>
#include <signal.h>
Expand All @@ -34,6 +44,7 @@
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include "opal/util/argv.h"
#include "opal/util/proc.h"
#include "opal/runtime/opal_params.h"

#ifndef _NSIG
Expand All @@ -42,9 +53,35 @@

#define HOSTFORMAT "[%s:%05d] "

int opal_stacktrace_output_fileno = -1;
static char *opal_stacktrace_output_filename_base = NULL;
static size_t opal_stacktrace_output_filename_max_len = 0;
static char stacktrace_hostname[OPAL_MAXHOSTNAMELEN];
static char *unable_to_print_msg = "Unable to print stack trace!\n";

/*
* Set the stacktrace filename:
* stacktrace.PID
* -or, if VPID is available-
* stacktrace.VPID.PID
*/
static void set_stacktrace_filename(void) {
opal_proc_t *my_proc = opal_proc_local_get();

if( NULL == my_proc ) {
snprintf(opal_stacktrace_output_filename, opal_stacktrace_output_filename_max_len,
"%s.%lu",
opal_stacktrace_output_filename_base, (unsigned long)getpid());
}
else {
snprintf(opal_stacktrace_output_filename, opal_stacktrace_output_filename_max_len,
"%s.%lu.%lu",
opal_stacktrace_output_filename_base, (unsigned long)my_proc->proc_name.vpid, (unsigned long)getpid());
}

return;
}

/**
* This function is being called as a signal-handler in response
* to a user-specified signal (e.g. SIGFPE or SIGSEGV).
Expand All @@ -68,12 +105,37 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
int ret;
char *si_code_str = "";

/* Do not print the stack trace */
if( 0 > opal_stacktrace_output_fileno && 0 == opal_stacktrace_output_filename_max_len ) {
/* Raise the signal again, so we don't accidentally mask critical signals.
* For critical signals, it is preferred that we call 'raise' instead of
* 'exit' or 'abort' so that the return status is set properly for this
* process.
*/
signal(signo, SIG_DFL);
raise(signo);

return;
}

/* Update the file name with the RANK, if available */
if( 0 < opal_stacktrace_output_filename_max_len ) {
set_stacktrace_filename();
opal_stacktrace_output_fileno = open(opal_stacktrace_output_filename,
O_CREAT|O_WRONLY|O_TRUNC, S_IRUSR|S_IWUSR);
if( 0 > opal_stacktrace_output_fileno ) {
opal_output(0, "Error: Failed to open the stacktrace output file. Default: stderr\n\tFilename: %s\n\tErrno: %s",
opal_stacktrace_output_filename, strerror(errno));
opal_stacktrace_output_fileno = fileno(stderr);
}
}

/* write out the footer information */
memset (print_buffer, 0, sizeof (print_buffer));
ret = snprintf(print_buffer, sizeof(print_buffer),
HOSTFORMAT "*** Process received signal ***\n",
stacktrace_hostname, getpid());
write(fileno(stderr), print_buffer, ret);
write(opal_stacktrace_output_fileno, print_buffer, ret);


memset (print_buffer, 0, sizeof (print_buffer));
Expand Down Expand Up @@ -323,14 +385,14 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
}

/* write out the signal information generated above */
write(fileno(stderr), print_buffer, sizeof(print_buffer)-size);
write(opal_stacktrace_output_fileno, print_buffer, sizeof(print_buffer)-size);

/* print out the stack trace */
snprintf(print_buffer, sizeof(print_buffer), HOSTFORMAT,
stacktrace_hostname, getpid());
ret = opal_backtrace_print(stderr, print_buffer, 2);
ret = opal_backtrace_print(NULL, print_buffer, 2);
if (OPAL_SUCCESS != ret) {
write(fileno(stderr), unable_to_print_msg, strlen(unable_to_print_msg));
write(opal_stacktrace_output_fileno, unable_to_print_msg, strlen(unable_to_print_msg));
}

/* write out the footer information */
Expand All @@ -339,10 +401,24 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
HOSTFORMAT "*** End of error message ***\n",
stacktrace_hostname, getpid());
if (ret > 0) {
write(fileno(stderr), print_buffer, ret);
write(opal_stacktrace_output_fileno, print_buffer, ret);
} else {
write(fileno(stderr), unable_to_print_msg, strlen(unable_to_print_msg));
write(opal_stacktrace_output_fileno, unable_to_print_msg, strlen(unable_to_print_msg));
}

if( fileno(stdout) != opal_stacktrace_output_fileno &&
fileno(stderr) != opal_stacktrace_output_fileno ) {
close(opal_stacktrace_output_fileno);
opal_stacktrace_output_fileno = -1;
}

/* Raise the signal again, so we don't accidentally mask critical signals.
* For critical signals, it is preferred that we call 'raise' instead of
* 'exit' or 'abort' so that the return status is set properly for this
* process.
*/
signal(signo, SIG_DFL);
raise(signo);
}

#endif /* OPAL_WANT_PRETTY_PRINT_STACKTRACE */
Expand All @@ -364,7 +440,30 @@ void opal_stackframe_output(int stream)
opal_output(stream, "%s", traces[i]);
}
} else {
opal_backtrace_print(stderr, NULL, 2);
/* Do not print the stack trace */
if( 0 > opal_stacktrace_output_fileno && 0 == opal_stacktrace_output_filename_max_len ) {
return;
}

/* Update the file name with the RANK, if available */
if( 0 < opal_stacktrace_output_filename_max_len ) {
set_stacktrace_filename();
opal_stacktrace_output_fileno = open(opal_stacktrace_output_filename,
O_CREAT|O_WRONLY|O_TRUNC, S_IRUSR|S_IWUSR);
if( 0 > opal_stacktrace_output_fileno ) {
opal_output(0, "Error: Failed to open the stacktrace output file. Default: stderr\n\tFilename: %s\n\tErrno: %s",
opal_stacktrace_output_filename, strerror(errno));
opal_stacktrace_output_fileno = fileno(stderr);
}
}

opal_backtrace_print(NULL, NULL, 2);

if( fileno(stdout) != opal_stacktrace_output_fileno &&
fileno(stderr) != opal_stacktrace_output_fileno ) {
close(opal_stacktrace_output_fileno);
opal_stacktrace_output_fileno = -1;
}
}
}

Expand Down Expand Up @@ -435,6 +534,50 @@ int opal_util_register_stackhandlers (void)
}
}

/* Setup the output stream to use */
if( NULL == opal_stacktrace_output_filename ||
0 == strcasecmp(opal_stacktrace_output_filename, "none") ) {
opal_stacktrace_output_fileno = -1;
}
else if( 0 == strcasecmp(opal_stacktrace_output_filename, "stdout") ) {
opal_stacktrace_output_fileno = fileno(stdout);
}
else if( 0 == strcasecmp(opal_stacktrace_output_filename, "stderr") ) {
opal_stacktrace_output_fileno = fileno(stdout);
}
else if( 0 == strcasecmp(opal_stacktrace_output_filename, "file" ) ||
0 == strcasecmp(opal_stacktrace_output_filename, "file:") ) {
opal_stacktrace_output_filename_base = strdup("stacktrace");

free(opal_stacktrace_output_filename);
// Magic number: 8 = space for .PID and .RANK (allow 7 digits each)
opal_stacktrace_output_filename_max_len = strlen("stacktrace") + 8 + 8;
opal_stacktrace_output_filename = (char*)malloc(sizeof(char) * opal_stacktrace_output_filename_max_len);
set_stacktrace_filename();
opal_stacktrace_output_fileno = -1;
}
else if( 0 == strncasecmp(opal_stacktrace_output_filename, "file:", 5) ) {
char *filename_cpy = NULL;
next = strchr(opal_stacktrace_output_filename, ':');
next++; // move past the ':' to the filename specified

opal_stacktrace_output_filename_base = strdup(next);

free(opal_stacktrace_output_filename);
// Magic number: 8 = space for .PID and .RANK (allow 7 digits each)
opal_stacktrace_output_filename_max_len = strlen(opal_stacktrace_output_filename_base) + 8 + 8;
opal_stacktrace_output_filename = (char*)malloc(sizeof(char) * opal_stacktrace_output_filename_max_len);
set_stacktrace_filename();
opal_stacktrace_output_fileno = -1;

free(filename_cpy);
}
else {
opal_stacktrace_output_fileno = fileno(stderr);
}


/* Setup the signals to catch */
memset(&act, 0, sizeof(act));
act.sa_sigaction = show_stackframe;
act.sa_flags = SA_SIGINFO;
Expand Down
Loading