From eabe3e382af4b8e8f4134273830a53ef26ffa3d0 Mon Sep 17 00:00:00 2001 From: tstuefe Date: Fri, 23 Oct 2020 16:11:47 +0200 Subject: [PATCH 1/3] Initial patch --- src/hotspot/os/posix/signals_posix.cpp | 65 ++++++-------------------- src/hotspot/os/posix/signals_posix.hpp | 7 ++- src/hotspot/os/posix/vmError_posix.cpp | 12 +---- 3 files changed, 22 insertions(+), 62 deletions(-) diff --git a/src/hotspot/os/posix/signals_posix.cpp b/src/hotspot/os/posix/signals_posix.cpp index 4eea0664566e5..5959cde01c198 100644 --- a/src/hotspot/os/posix/signals_posix.cpp +++ b/src/hotspot/os/posix/signals_posix.cpp @@ -451,46 +451,33 @@ extern "C" JNIEXPORT int JVM_handle_linux_signal(int signo, siginfo_t* siginfo, int abort_if_unrecognized); #endif -#if defined(AIX) - -// Set thread signal mask (for some reason on AIX sigthreadmask() seems -// to be the thing to call; documentation is not terribly clear about whether -// pthread_sigmask also works, and if it does, whether it does the same. -bool set_thread_signal_mask(int how, const sigset_t* set, sigset_t* oset) { - const int rc = ::pthread_sigmask(how, set, oset); - // return value semantics differ slightly for error case: - // pthread_sigmask returns error number, sigthreadmask -1 and sets global errno - // (so, pthread_sigmask is more theadsafe for error handling) - // But success is always 0. - return rc == 0 ? true : false; -} - -// Function to unblock all signals which are, according -// to POSIX, typical program error signals. If they happen while being blocked, -// they typically will bring down the process immediately. -bool unblock_program_error_signals() { +// Unblock all signals whose delivery cannot be deferred and which, if they happen +// while delivery is blocked, would cause crashes or hangs (JDK-8252533). +void PosixSignals::unblock_error_signals() { sigset_t set; sigemptyset(&set); sigaddset(&set, SIGILL); sigaddset(&set, SIGBUS); sigaddset(&set, SIGFPE); sigaddset(&set, SIGSEGV); - return set_thread_signal_mask(SIG_UNBLOCK, &set, NULL); -} - + // We also unblock SIGTRAP, which is not in the list of POSIX lists of signals + // which would cause undefined behavior when blocked + // (https://pubs.opengroup.org/onlinepubs/009695399/functions/sigprocmask.html) + // but in our experience cause the same problems when occurring when blocked. + sigaddset(&set, SIGTRAP); + int rc = ::pthread_sigmask(SIG_UNBLOCK, &set, NULL); +#ifdef ASSERT + if (rc != 0) { + log_warning(os)("pthread_sigmask failed (%d)", errno); + } #endif +} // Renamed from 'signalHandler' to avoid collision with other shared libs. static void javaSignalHandler(int sig, siginfo_t* info, void* uc) { assert(info != NULL && uc != NULL, "it must be old kernel"); -// TODO: reconcile the differences between Linux/BSD vs AIX here! -#if defined(AIX) - // Never leave program error signals blocked; - // on all our platforms they would bring down the process immediately when - // getting raised while being blocked. - unblock_program_error_signals(); -#endif + PosixSignals::unblock_error_signals(); int orig_errno = errno; // Preserve errno value over signal handler. #if defined(BSD) @@ -702,24 +689,6 @@ void* os::signal(int signal_number, void* handler) { struct sigaction sigAct, oldSigAct; sigfillset(&(sigAct.sa_mask)); - -#if defined(AIX) - // Do not block out synchronous signals in the signal handler. - // Blocking synchronous signals only makes sense if you can really - // be sure that those signals won't happen during signal handling, - // when the blocking applies. Normal signal handlers are lean and - // do not cause signals. But our signal handlers tend to be "risky" - // - secondary SIGSEGV, SIGILL, SIGBUS' may and do happen. - // On AIX, PASE there was a case where a SIGSEGV happened, followed - // by a SIGILL, which was blocked due to the signal mask. The process - // just hung forever. Better to crash from a secondary signal than to hang. - sigdelset(&(sigAct.sa_mask), SIGSEGV); - sigdelset(&(sigAct.sa_mask), SIGBUS); - sigdelset(&(sigAct.sa_mask), SIGILL); - sigdelset(&(sigAct.sa_mask), SIGFPE); - sigdelset(&(sigAct.sa_mask), SIGTRAP); -#endif - sigAct.sa_flags = SA_RESTART|SA_SIGINFO; sigAct.sa_handler = CAST_TO_FN_PTR(sa_handler_t, handler); @@ -1303,10 +1272,6 @@ bool PosixSignals::is_sig_ignored(int sig) { } } -int PosixSignals::unblock_thread_signal_mask(const sigset_t *set) { - return pthread_sigmask(SIG_UNBLOCK, set, NULL); -} - address PosixSignals::ucontext_get_pc(const ucontext_t* ctx) { #if defined(AIX) return os::Aix::ucontext_get_pc(ctx); diff --git a/src/hotspot/os/posix/signals_posix.hpp b/src/hotspot/os/posix/signals_posix.hpp index a2f7d955e1461..7194bd6c57397 100644 --- a/src/hotspot/os/posix/signals_posix.hpp +++ b/src/hotspot/os/posix/signals_posix.hpp @@ -44,8 +44,6 @@ class PosixSignals : public AllStatic { static bool is_sig_ignored(int sig); static void signal_sets_init(); - // unblocks the signal masks for current thread - static int unblock_thread_signal_mask(const sigset_t *set); static void hotspot_sigmask(Thread* thread); static void print_signal_handler(outputStream* st, int sig, char* buf, size_t buflen); @@ -64,6 +62,11 @@ class PosixSignals : public AllStatic { // sun.misc.Signal support static void jdk_misc_signal_init(); + + // Unblock all signals whose delivery cannot be deferred and which, if they happen + // while delivery is blocked, would cause crashes or hangs (see JDK-8252533). + static void unblock_error_signals(); + }; #endif // OS_POSIX_SIGNALS_POSIX_HPP diff --git a/src/hotspot/os/posix/vmError_posix.cpp b/src/hotspot/os/posix/vmError_posix.cpp index 9c83d263e7184..0e227deb39f5c 100644 --- a/src/hotspot/os/posix/vmError_posix.cpp +++ b/src/hotspot/os/posix/vmError_posix.cpp @@ -101,15 +101,8 @@ address VMError::get_resetted_sighandler(int sig) { } static void crash_handler(int sig, siginfo_t* info, void* ucVoid) { - // unmask current signal - sigset_t newset; - sigemptyset(&newset); - sigaddset(&newset, sig); - // also unmask other synchronous signals - for (int i = 0; i < NUM_SIGNALS; i++) { - sigaddset(&newset, SIGNALS[i]); - } - PosixSignals::unblock_thread_signal_mask(&newset); + + PosixSignals::unblock_error_signals(); // support safefetch faults in error handling ucontext_t* const uc = (ucontext_t*) ucVoid; @@ -148,7 +141,6 @@ void VMError::reset_signal_handlers() { os::signal(SIGNALS[i], CAST_FROM_FN_PTR(void *, crash_handler)); sigaddset(&newset, SIGNALS[i]); } - PosixSignals::unblock_thread_signal_mask(&newset); } // Write a hint to the stream in case siginfo relates to a segv/bus error From 8d86e25a89e7e7f7af463a6bc9a58cb57fb63d05 Mon Sep 17 00:00:00 2001 From: tstuefe Date: Thu, 29 Oct 2020 09:14:36 +0100 Subject: [PATCH 2/3] Feedback David --- src/hotspot/os/posix/signals_posix.cpp | 71 ++++++++++++++++++++------ src/hotspot/os/posix/vmError_posix.cpp | 5 -- 2 files changed, 56 insertions(+), 20 deletions(-) diff --git a/src/hotspot/os/posix/signals_posix.cpp b/src/hotspot/os/posix/signals_posix.cpp index 5959cde01c198..614e3e106b5b7 100644 --- a/src/hotspot/os/posix/signals_posix.cpp +++ b/src/hotspot/os/posix/signals_posix.cpp @@ -451,26 +451,57 @@ extern "C" JNIEXPORT int JVM_handle_linux_signal(int signo, siginfo_t* siginfo, int abort_if_unrecognized); #endif + +///// Synchronous (non-deferrable) error signals (ILL, SEGV, FPE, BUS, TRAP): + +// These signals are special because they cannot be deferred and, if they +// happen while delivery is blocked for the receiving thread, will cause UB +// (in practice typically resulting in sudden process deaths or hangs, see +// JDK-8252533). So we must take care never to block them when we cannot be +// absolutely sure they won't happen. In practice, this is always. +// +// Relevant Posix quote: +// +// The behavior of a process is undefined after it ignores a SIGFPE, SIGILL, +// SIGSEGV, or SIGBUS signal that was not generated by kill(), sigqueue(), or +// raise(). +// +// +// We also include SIGTRAP in that list of never-to-block-signals. While not +// mentioned by the Posix documentation, in our (SAPs) experience blocking it +// causes similar problems. Beside, during normal operation - outside of error +// handling - SIGTRAP may be used for implicit NULL checking, so it makes sense +// to never block it. +// +// We deal with those signals in two ways: +// - we just never explicitly block them, which includes not accidentally blocking +// them via sa_mask when establishing signal handlers. +// - as an additional safety measure, at the entrance of a signal handler, we +// unblock them explicitly. + +static void add_error_signals_to_set(sigset_t* set) { + sigaddset(set, SIGILL); + sigaddset(set, SIGBUS); + sigaddset(set, SIGFPE); + sigaddset(set, SIGSEGV); + sigaddset(set, SIGTRAP); +} + +static void remove_error_signals_from_set(sigset_t* set) { + sigdelset(set, SIGILL); + sigdelset(set, SIGBUS); + sigdelset(set, SIGFPE); + sigdelset(set, SIGSEGV); + sigdelset(set, SIGTRAP); +} + // Unblock all signals whose delivery cannot be deferred and which, if they happen // while delivery is blocked, would cause crashes or hangs (JDK-8252533). void PosixSignals::unblock_error_signals() { sigset_t set; sigemptyset(&set); - sigaddset(&set, SIGILL); - sigaddset(&set, SIGBUS); - sigaddset(&set, SIGFPE); - sigaddset(&set, SIGSEGV); - // We also unblock SIGTRAP, which is not in the list of POSIX lists of signals - // which would cause undefined behavior when blocked - // (https://pubs.opengroup.org/onlinepubs/009695399/functions/sigprocmask.html) - // but in our experience cause the same problems when occurring when blocked. - sigaddset(&set, SIGTRAP); - int rc = ::pthread_sigmask(SIG_UNBLOCK, &set, NULL); -#ifdef ASSERT - if (rc != 0) { - log_warning(os)("pthread_sigmask failed (%d)", errno); - } -#endif + add_error_signals_to_set(&set); + ::pthread_sigmask(SIG_UNBLOCK, &set, NULL); } // Renamed from 'signalHandler' to avoid collision with other shared libs. @@ -491,6 +522,9 @@ static void javaSignalHandler(int sig, siginfo_t* info, void* uc) { } static void UserHandler(int sig, void *siginfo, void *context) { + + PosixSignals::unblock_error_signals(); + // Ctrl-C is pressed during error reporting, likely because the error // handler fails to abort. Let VM die immediately. if (sig == SIGINT && VMError::is_error_reported()) { @@ -689,6 +723,8 @@ void* os::signal(int signal_number, void* handler) { struct sigaction sigAct, oldSigAct; sigfillset(&(sigAct.sa_mask)); + remove_error_signals_from_set(&(sigAct.sa_mask)); + sigAct.sa_flags = SA_RESTART|SA_SIGINFO; sigAct.sa_handler = CAST_TO_FN_PTR(sa_handler_t, handler); @@ -1068,6 +1104,7 @@ void set_signal_handler(int sig, bool set_installed) { struct sigaction sigAct; sigfillset(&(sigAct.sa_mask)); + remove_error_signals_from_set(&(sigAct.sa_mask)); sigAct.sa_handler = SIG_DFL; if (!set_installed) { sigAct.sa_flags = SA_SIGINFO|SA_RESTART; @@ -1435,10 +1472,13 @@ static void suspend_save_context(OSThread *osthread, siginfo_t* siginfo, ucontex // Currently only ever called on the VMThread and JavaThreads (PC sampling) // static void SR_handler(int sig, siginfo_t* siginfo, ucontext_t* context) { + // Save and restore errno to avoid confusing native code with EINTR // after sigsuspend. int old_errno = errno; + PosixSignals::unblock_error_signals(); + Thread* thread = Thread::current_or_null_safe(); assert(thread != NULL, "Missing current thread in SR_handler"); @@ -1532,6 +1572,7 @@ int PosixSignals::SR_initialize() { // SR_signum is blocked by default. pthread_sigmask(SIG_BLOCK, NULL, &act.sa_mask); + remove_error_signals_from_set(&(act.sa_mask)); if (sigaction(SR_signum, &act, 0) == -1) { return -1; diff --git a/src/hotspot/os/posix/vmError_posix.cpp b/src/hotspot/os/posix/vmError_posix.cpp index 0e227deb39f5c..bde46e2874179 100644 --- a/src/hotspot/os/posix/vmError_posix.cpp +++ b/src/hotspot/os/posix/vmError_posix.cpp @@ -132,14 +132,9 @@ static void crash_handler(int sig, siginfo_t* info, void* ucVoid) { } void VMError::reset_signal_handlers() { - // install signal handlers for all synchronous program error signals - sigset_t newset; - sigemptyset(&newset); - for (int i = 0; i < NUM_SIGNALS; i++) { save_signal(i, SIGNALS[i]); os::signal(SIGNALS[i], CAST_FROM_FN_PTR(void *, crash_handler)); - sigaddset(&newset, SIGNALS[i]); } } From d9cbdb6016555b6483047f97019396aea9f62928 Mon Sep 17 00:00:00 2001 From: tstuefe Date: Tue, 3 Nov 2020 07:45:31 +0100 Subject: [PATCH 3/3] last nits (comments only) ironed out --- src/hotspot/os/posix/signals_posix.cpp | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/hotspot/os/posix/signals_posix.cpp b/src/hotspot/os/posix/signals_posix.cpp index 614e3e106b5b7..6b248a969813d 100644 --- a/src/hotspot/os/posix/signals_posix.cpp +++ b/src/hotspot/os/posix/signals_posix.cpp @@ -455,23 +455,21 @@ extern "C" JNIEXPORT int JVM_handle_linux_signal(int signo, siginfo_t* siginfo, ///// Synchronous (non-deferrable) error signals (ILL, SEGV, FPE, BUS, TRAP): // These signals are special because they cannot be deferred and, if they -// happen while delivery is blocked for the receiving thread, will cause UB -// (in practice typically resulting in sudden process deaths or hangs, see -// JDK-8252533). So we must take care never to block them when we cannot be -// absolutely sure they won't happen. In practice, this is always. +// happen while delivery is blocked for the receiving thread, will cause UB +// (in practice typically resulting in sudden process deaths or hangs, see +// JDK-8252533). So we must take care never to block them when we cannot be +// absolutely sure they won't happen. In practice, this is always. // // Relevant Posix quote: -// -// The behavior of a process is undefined after it ignores a SIGFPE, SIGILL, -// SIGSEGV, or SIGBUS signal that was not generated by kill(), sigqueue(), or -// raise(). -// +// "The behavior of a process is undefined after it ignores a SIGFPE, SIGILL, +// SIGSEGV, or SIGBUS signal that was not generated by kill(), sigqueue(), or +// raise()." // // We also include SIGTRAP in that list of never-to-block-signals. While not -// mentioned by the Posix documentation, in our (SAPs) experience blocking it -// causes similar problems. Beside, during normal operation - outside of error -// handling - SIGTRAP may be used for implicit NULL checking, so it makes sense -// to never block it. +// mentioned by the Posix documentation, in our (SAPs) experience blocking it +// causes similar problems. Beside, during normal operation - outside of error +// handling - SIGTRAP may be used for implicit NULL checking, so it makes sense +// to never block it. // // We deal with those signals in two ways: // - we just never explicitly block them, which includes not accidentally blocking