From 08465b43ae48b3fdd9664d370b1aaca5b15290fa Mon Sep 17 00:00:00 2001 From: Yichao Yu Date: Sat, 30 Jul 2016 08:04:26 +0800 Subject: [PATCH 1/2] Add more `try`-`catch` and sigatomic for top-level code/new tasks So that we don't need to run `jl_exit` in strange (signal handler) context due to missing exception handler. --- base/task.jl | 3 +++ src/julia.h | 1 + src/task.c | 22 ++++++++++++++++------ ui/repl.c | 7 ++++++- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/base/task.jl b/base/task.jl index cc06115a23554..3f3fc253e7338 100644 --- a/base/task.jl +++ b/base/task.jl @@ -141,6 +141,7 @@ suppress_excp_printing(t::Task) = isa(t.storage, ObjectIdDict) ? get(get_task_tl # runtime system hook called when a task finishes function task_done_hook(t::Task) + # `finish_task` sets `sigatomic` before entering this function err = (t.state == :failed) result = t.result handled = false @@ -188,6 +189,8 @@ function task_done_hook(t::Task) end end end + # Clear sigatomic before waiting + sigatomic_end() wait() end diff --git a/src/julia.h b/src/julia.h index 3e3b2284300bf..5ee59917b999f 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1465,6 +1465,7 @@ JL_DLLEXPORT jl_value_t *jl_switchto(jl_task_t *t, jl_value_t *arg); JL_DLLEXPORT void JL_NORETURN jl_throw(jl_value_t *e); JL_DLLEXPORT void JL_NORETURN jl_rethrow(void); JL_DLLEXPORT void JL_NORETURN jl_rethrow_other(jl_value_t *e); +JL_DLLEXPORT void JL_NORETURN jl_no_exc_handler(jl_value_t *e); #ifdef JULIA_ENABLE_THREADING static inline void jl_lock_frame_push(jl_mutex_t *lock) diff --git a/src/task.c b/src/task.c index b1cf229bd1248..105e80d34d579 100644 --- a/src/task.c +++ b/src/task.c @@ -188,6 +188,7 @@ static jl_function_t *task_done_hook_func=NULL; static void JL_NORETURN finish_task(jl_task_t *t, jl_value_t *resultval) { jl_ptls_t ptls = jl_get_ptls_states(); + JL_SIGATOMIC_BEGIN(); if (t->exception != jl_nothing) t->state = failed_sym; else @@ -211,7 +212,12 @@ static void JL_NORETURN finish_task(jl_task_t *t, jl_value_t *resultval) } if (task_done_hook_func != NULL) { jl_value_t *args[2] = {task_done_hook_func, (jl_value_t*)t}; - jl_apply(args, 2); + JL_TRY { + jl_apply(args, 2); + } + JL_CATCH { + jl_no_exc_handler(jl_exception_in_transit); + } } gc_debug_critical_error(); abort(); @@ -509,6 +515,14 @@ static void init_task(jl_task_t *t, char *stack) #endif /* !COPY_STACKS */ jl_timing_block_t *jl_pop_timing_block(jl_timing_block_t *cur_block); +JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e) +{ + jl_printf(JL_STDERR, "fatal: error thrown and no exception handler available.\n"); + jl_static_show(JL_STDERR, e); + jl_printf(JL_STDERR, "\n"); + jlbacktrace(); + jl_exit(1); +} // yield to exception handler void JL_NORETURN throw_internal(jl_value_t *e) @@ -532,11 +546,7 @@ void JL_NORETURN throw_internal(jl_value_t *e) jl_longjmp(eh->eh_ctx, 1); } else { - jl_printf(JL_STDERR, "fatal: error thrown and no exception handler available.\n"); - jl_static_show(JL_STDERR, e); - jl_printf(JL_STDERR, "\n"); - jlbacktrace(); - jl_exit(1); + jl_no_exc_handler(e); } assert(0); } diff --git a/ui/repl.c b/ui/repl.c index b0ae9c03c1ccd..826d23fb0dd7e 100644 --- a/ui/repl.c +++ b/ui/repl.c @@ -109,7 +109,12 @@ static NOINLINE int true_main(int argc, char *argv[]) (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("_start")) : NULL; if (start_client) { - jl_apply(&start_client, 1); + JL_TRY { + jl_apply(&start_client, 1); + } + JL_CATCH { + jl_no_exc_handler(jl_exception_in_transit); + } return 0; } From 03c3c7004bec19eadb5b5dee70183d63ca765eec Mon Sep 17 00:00:00 2001 From: Yichao Yu Date: Sat, 30 Jul 2016 23:22:35 +0800 Subject: [PATCH 2/2] Implement `jl_call_in_ctx` on unix. Use it to make sure that `jl_rethrow` and `jl_exit` are running on the right thread and right stack when an exception/exit is caused by a signal. Fix #17706 --- src/julia_internal.h | 3 - src/signals-mach.c | 86 ++++++++++++++----- src/signals-unix.c | 192 +++++++++++++++++++++++++++++++++++-------- src/threading.c | 8 -- 4 files changed, 226 insertions(+), 63 deletions(-) diff --git a/src/julia_internal.h b/src/julia_internal.h index 8346c247e5808..6eec5c076d43a 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -49,9 +49,6 @@ void jl_call_tracer(tracer_cb callback, jl_value_t *tracee); extern size_t jl_page_size; extern jl_function_t *jl_typeinf_func; -#if defined(JL_USE_INTEL_JITEVENTS) -extern unsigned sig_stack_size; -#endif JL_DLLEXPORT extern int jl_lineno; JL_DLLEXPORT extern const char *jl_filename; diff --git a/src/signals-mach.c b/src/signals-mach.c index b168f6cd70a11..cd7cb1b826557 100644 --- a/src/signals-mach.c +++ b/src/signals-mach.c @@ -15,7 +15,7 @@ #include #endif -static void attach_exception_port(thread_port_t thread); +static void attach_exception_port(thread_port_t thread, int segv_only); #ifdef JULIA_ENABLE_THREADING // low 16 bits are the thread id, the next 8 bits are the original gc_state @@ -99,7 +99,7 @@ static void allocate_segv_handler() } pthread_attr_destroy(&attr); for (int16_t tid = 0;tid < jl_n_threads;tid++) { - attach_exception_port(pthread_mach_thread_np(jl_all_tls_states[tid]->system_id)); + attach_exception_port(pthread_mach_thread_np(jl_all_tls_states[tid]->system_id), 0); } } @@ -120,7 +120,21 @@ enum x86_trap_flags { PAGE_PRESENT = 0x1 }; -void jl_throw_in_thread(int tid, mach_port_t thread, jl_value_t *exception) +static void jl_call_in_state(jl_ptls_t ptls2, x86_thread_state64_t *state, + void (*fptr)(void)) +{ + uint64_t rsp = (uint64_t)ptls2->signal_stack + sig_stack_size; + assert(rsp % 16 == 0); + + // push (null) $RIP onto the stack + rsp -= sizeof(void*); + *(void**)rsp = NULL; + + state->__rsp = rsp; // set stack pointer + state->__rip = (uint64_t)fptr; // "call" the function +} + +static void jl_throw_in_thread(int tid, mach_port_t thread, jl_value_t *exception) { unsigned int count = MACHINE_THREAD_STATE_COUNT; x86_thread_state64_t state; @@ -131,18 +145,9 @@ void jl_throw_in_thread(int tid, mach_port_t thread, jl_value_t *exception) ptls2->bt_size = rec_backtrace_ctx(ptls2->bt_data, JL_MAX_BT_SIZE, (bt_context_t*)&state); ptls2->exception_in_transit = exception; - - uint64_t rsp = (uint64_t)ptls2->signal_stack + sig_stack_size; - rsp &= -16; // ensure 16-byte alignment - - // push (null) $RIP onto the stack - rsp -= sizeof(void*); - *(void**)rsp = NULL; - - state.__rsp = rsp; // set stack pointer - state.__rip = (uint64_t)&jl_rethrow; // "call" the function - - ret = thread_set_state(thread, x86_THREAD_STATE64, (thread_state_t)&state, count); + jl_call_in_state(ptls2, &state, &jl_rethrow); + ret = thread_set_state(thread, x86_THREAD_STATE64, + (thread_state_t)&state, count); HANDLE_MACH_ERROR("thread_set_state",ret); } @@ -185,6 +190,11 @@ kern_return_t catch_exception_raise(mach_port_t exception_port, jl_ptls_t ptls2 = &jl_tls_states; tid = 0; #endif + if (exception == EXC_ARITHMETIC) { + jl_throw_in_thread(tid, thread, jl_diverror_exception); + return KERN_SUCCESS; + } + assert(exception == EXC_BAD_ACCESS); kern_return_t ret = thread_get_state(thread, x86_EXCEPTION_STATE64, (thread_state_t)&exc_state, &exc_count); HANDLE_MACH_ERROR("thread_get_state", ret); uint64_t fault_addr = exc_state.__faultvaddr; @@ -237,11 +247,14 @@ kern_return_t catch_exception_raise(mach_port_t exception_port, } } -static void attach_exception_port(thread_port_t thread) +static void attach_exception_port(thread_port_t thread, int segv_only) { kern_return_t ret; // http://www.opensource.apple.com/source/xnu/xnu-2782.1.97/osfmk/man/thread_set_exception_ports.html - ret = thread_set_exception_ports(thread, EXC_MASK_BAD_ACCESS, segv_port, EXCEPTION_DEFAULT, MACHINE_THREAD_STATE); + exception_mask_t mask = EXC_MASK_BAD_ACCESS; + if (!segv_only) + mask |= EXC_MASK_ARITHMETIC; + ret = thread_set_exception_ports(thread, mask, segv_port, EXCEPTION_DEFAULT, MACHINE_THREAD_STATE); HANDLE_MACH_ERROR("thread_set_exception_ports", ret); } @@ -283,7 +296,7 @@ static void jl_try_deliver_sigint(void) kern_return_t ret = thread_suspend(thread); HANDLE_MACH_ERROR("thread_suspend", ret); - // This aborts `sleep` and other syscall. + // This aborts `sleep` and other syscalls. ret = thread_abort(thread); HANDLE_MACH_ERROR("thread_abort", ret); @@ -304,6 +317,41 @@ static void jl_try_deliver_sigint(void) HANDLE_MACH_ERROR("thread_resume", ret); } +static void jl_exit_thread0(int exitstate) +{ + jl_ptls_t ptls2 = jl_all_tls_states[0]; + mach_port_t thread = pthread_mach_thread_np(ptls2->system_id); + kern_return_t ret = thread_suspend(thread); + HANDLE_MACH_ERROR("thread_suspend", ret); + + // This aborts `sleep` and other syscalls. + ret = thread_abort(thread); + HANDLE_MACH_ERROR("thread_abort", ret); + + unsigned int count = MACHINE_THREAD_STATE_COUNT; + x86_thread_state64_t state; + ret = thread_get_state(thread, x86_THREAD_STATE64, + (thread_state_t)&state, &count); + + void (*exit_func)(int) = &_exit; + if (thread0_exit_count <= 1) { + exit_func = &jl_exit; + } + else if (thread0_exit_count == 2) { + exit_func = &exit; + } + + // First integer argument. Not portable but good enough =) + state.__rdi = exitstate; + jl_call_in_state(ptls2, &state, (void (*)(void))exit_func); + ret = thread_set_state(thread, x86_THREAD_STATE64, + (thread_state_t)&state, count); + HANDLE_MACH_ERROR("thread_set_state",ret); + + ret = thread_resume(thread); + HANDLE_MACH_ERROR("thread_resume", ret); +} + static int profile_started = 0; mach_timespec_t timerprof; static pthread_t profiler_thread; @@ -363,7 +411,7 @@ void *mach_profile_listener(void *arg) (void)arg; int i; const int max_size = 512; - attach_exception_port(mach_thread_self()); + attach_exception_port(mach_thread_self(), 1); #ifdef LIBOSXUNWIND mach_profiler_thread = mach_thread_self(); #endif diff --git a/src/signals-unix.c b/src/signals-unix.c index 9557dd0a3bd60..9bf9bdb01f734 100644 --- a/src/signals-unix.c +++ b/src/signals-unix.c @@ -28,14 +28,10 @@ #define HAVE_TIMER #endif -#if defined(JL_USE_INTEL_JITEVENTS) -unsigned sig_stack_size = SIGSTKSZ; -#elif defined(_CPU_AARCH64_) -// The default SIGSTKSZ causes stack overflow in libunwind. -#define sig_stack_size (1 << 16) -#else -#define sig_stack_size SIGSTKSZ -#endif +// 8M signal stack, same as default stack size and enough +// for reasonable finalizers. +// Should also be enough for parallel GC when we have it =) +#define sig_stack_size (8 * 1024 * 1024) static bt_context_t *jl_to_bt_context(void *sigctx) { @@ -46,17 +42,88 @@ static bt_context_t *jl_to_bt_context(void *sigctx) #endif } -static void JL_NORETURN jl_throw_in_ctx(jl_value_t *e, void *sigctx) +static int thread0_exit_count = 0; + +static inline __attribute__((unused)) uintptr_t jl_get_rsp_from_ctx(const void *_ctx) +{ +#if defined(_OS_LINUX_) && defined(_CPU_X86_64_) + const ucontext_t *ctx = (const ucontext_t*)_ctx; + return ctx->uc_mcontext.gregs[REG_RSP]; +#elif defined(_OS_LINUX_) && defined(_CPU_X86_) + const ucontext_t *ctx = (const ucontext_t*)_ctx; + return ctx->uc_mcontext.gregs[REG_ESP]; +#elif defined(_OS_LINUX_) && defined(_CPU_AARCH64_) + const ucontext_t *ctx = (const ucontext_t*)_ctx; + return ctx->uc_mcontext.sp; +#elif defined(_OS_LINUX_) && defined(_CPU_ARM_) + const ucontext_t *ctx = (const ucontext_t*)_ctx; + return ctx->uc_mcontext.arm_sp; +#elif defined(_OS_DARWIN_) + const ucontext64_t *ctx = (const ucontext64_t*)_ctx; + return ctx->uc_mcontext64->__ss.__rsp; +#else + // TODO Add support for FreeBSD and PowerPC(64)? + return 0; +#endif +} + +static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), void *_ctx) +{ + // Modifying the ucontext should work but there is concern that + // sigreturn orientated programming mitigation can work against us + // by rejecting ucontext that is modified. + // The current (staged) implementation in the Linux Kernel only + // checks that the syscall is made in the signal handler and that + // the ucontext address is valid. Hopefully the value of the ucontext + // will not be part of the validation... + uintptr_t rsp = (uintptr_t)ptls->signal_stack + sig_stack_size; + assert(rsp % 16 == 0); +#if defined(_OS_LINUX_) && defined(_CPU_X86_64_) + ucontext_t *ctx = (ucontext_t*)_ctx; + rsp -= sizeof(void*); + *(void**)rsp = NULL; + ctx->uc_mcontext.gregs[REG_RSP] = rsp; + ctx->uc_mcontext.gregs[REG_RIP] = (uintptr_t)fptr; +#elif defined(_OS_LINUX_) && defined(_CPU_X86_) + ucontext_t *ctx = (ucontext_t*)_ctx; + rsp -= sizeof(void*); + *(void**)rsp = NULL; + ctx->uc_mcontext.gregs[REG_ESP] = rsp; + ctx->uc_mcontext.gregs[REG_EIP] = (uintptr_t)fptr; +#elif defined(_OS_LINUX_) && defined(_CPU_AARCH64_) + ucontext_t *ctx = (ucontext_t*)_ctx; + ctx->uc_mcontext.sp = rsp; + ctx->uc_mcontext.regs[29] = 0; // Clear link register (x29) + ctx->uc_mcontext.pc = (uintptr_t)fptr; +#elif defined(_OS_LINUX_) && defined(_CPU_ARM_) + ucontext_t *ctx = (ucontext_t*)_ctx; + ctx->uc_mcontext.arm_sp = rsp; + ctx->uc_mcontext.arm_lr = 0; // Clear link register + ctx->uc_mcontext.arm_pc = (uintptr_t)fptr; +#elif defined(_OS_DARWIN_) + // Only used for SIGFPE. + // This doesn't seems to be reliable when the SIGFPE is generated + // from a divide-by-zero exception, which is now handled by + // `catch_exception_raise`. It works fine when a signal is recieved + // due to `kill`/`raise` though. + ucontext64_t *ctx = (ucontext64_t*)_ctx; + rsp -= sizeof(void*); + *(void**)rsp = NULL; + ctx->uc_mcontext64->__ss.__rsp = rsp; + ctx->uc_mcontext64->__ss.__rip = (uintptr_t)fptr; +#else + // TODO Add support for FreeBSD and PowerPC(64)? + fptr(); +#endif +} + +static void jl_throw_in_ctx(jl_ptls_t ptls, jl_value_t *e, void *sigctx) { - jl_ptls_t ptls = jl_get_ptls_states(); if (!ptls->safe_restore) ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, jl_to_bt_context(sigctx)); ptls->exception_in_transit = e; - // TODO throw the error by modifying sigctx for supported platforms - // This will avoid running the atexit handler on the signal stack - // if no excepiton handler is registered. - jl_rethrow(); + jl_call_in_ctx(ptls, &jl_rethrow, sigctx); } static pthread_t signals_thread; @@ -104,6 +171,19 @@ static void jl_unblock_signal(int sig) #include #else +static int is_addr_on_sigstack(jl_ptls_t ptls, void *ptr) +{ + // One guard page for signal_stack. + return !((char*)ptr < (char*)ptls->signal_stack - jl_page_size || + (char*)ptr > (char*)ptls->signal_stack + sig_stack_size); +} + +static int jl_is_on_sigstack(jl_ptls_t ptls, void *ptr, void *context) +{ + return (is_addr_on_sigstack(ptls, ptr) && + is_addr_on_sigstack(ptls, (void*)jl_get_rsp_from_ctx(context))); +} + static void segv_handler(int sig, siginfo_t *info, void *context) { jl_ptls_t ptls = jl_get_ptls_states(); @@ -117,27 +197,36 @@ static void segv_handler(int sig, siginfo_t *info, void *context) if (ptls->tid != 0) return; #endif - if (jl_get_ptls_states()->defer_signal) { + if (ptls->defer_signal) { jl_safepoint_defer_sigint(); } else if (jl_safepoint_consume_sigint()) { jl_clear_force_sigint(); - jl_throw_in_ctx(jl_interrupt_exception, context); + jl_throw_in_ctx(ptls, jl_interrupt_exception, context); } return; } - if (ptls->safe_restore || is_addr_on_stack(jl_get_ptls_states(), info->si_addr)) { // stack overflow, or restarting jl_ + if (ptls->safe_restore || is_addr_on_stack(ptls, info->si_addr)) { // stack overflow, or restarting jl_ jl_unblock_signal(sig); - jl_throw_in_ctx(jl_stackovf_exception, context); + jl_throw_in_ctx(ptls, jl_stackovf_exception, context); + } + else if (jl_is_on_sigstack(ptls, info->si_addr, context)) { + // This mainly happens when one of the finalizers during final cleanup + // on the signal stack has a deep/infinite recursion. + // There isn't anything more we can do + // (we are already corrupting that stack running this function) + // so just call `_exit` to terminate immediately. + jl_safe_printf("ERROR: Signal stack overflow, exit\n"); + _exit(sig + 128); } else if (sig == SIGSEGV && info->si_code == SEGV_ACCERR) { // writing to read-only memory (e.g., mmap) jl_unblock_signal(sig); - jl_throw_in_ctx(jl_readonlymemory_exception, context); + jl_throw_in_ctx(ptls, jl_readonlymemory_exception, context); } else { #ifdef SEGV_EXCEPTION jl_unblock_signal(sig); - jl_throw_in_ctx(jl_segv_exception, context); + jl_throw_in_ctx(ptls, jl_segv_exception, context); #else sigdie_handler(sig, info, context); #endif @@ -199,11 +288,39 @@ static void jl_try_deliver_sigint(void) pthread_kill(ptls2->system_id, SIGUSR2); } +// Write only by signal handling thread, read only by main thread +// no sync necessary. +static int thread0_exit_state = 0; +static void jl_exit_thread0_cb(void) +{ + // This can get stuck if it happens at an unfortunate spot + // (unavoidable due to its async nature). + // Try harder to exit each time if we get multiple exit requests. + if (thread0_exit_count <= 1) { + jl_exit(thread0_exit_state); + } + else if (thread0_exit_count == 2) { + exit(thread0_exit_state); + } + else { + _exit(thread0_exit_state); + } +} + +static void jl_exit_thread0(int state) +{ + jl_ptls_t ptls2 = jl_all_tls_states[0]; + thread0_exit_state = state; + jl_atomic_store_release(&ptls2->signal_request, 3); + pthread_kill(ptls2->system_id, SIGUSR2); +} + // request: // 0: nothing // 1: get state -// 3: throw sigint if `!defer_signal && io_wait` or if force throw threshold +// 2: throw sigint if `!defer_signal && io_wait` or if force throw threshold // is reached +// 3: exit with `thread0_exit_state` void usr2_handler(int sig, siginfo_t *info, void *ctx) { jl_ptls_t ptls = jl_get_ptls_states(); @@ -229,9 +346,13 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx) jl_safe_printf("WARNING: Force throwing a SIGINT\n"); // Force a throw jl_clear_force_sigint(); - jl_throw_in_ctx(jl_interrupt_exception, ctx); + jl_throw_in_ctx(ptls, jl_interrupt_exception, ctx); } } + else if (request == 3) { + jl_unblock_signal(sig); + jl_call_in_ctx(ptls, jl_exit_thread0_cb, ctx); + } } #if defined(HAVE_TIMER) @@ -407,6 +528,15 @@ static void *signal_listener(void *arg) critical |= (sig == SIGUSR1 && !profile); #endif + int doexit = critical; +#ifdef SIGINFO + if (sig == SIGINFO) + doexit = 0; +#else + if (sig == SIGUSR1) + doexit = 0; +#endif + bt_size = 0; // sample each thread, round-robin style in reverse order // (so that thread zero gets notified last) @@ -446,18 +576,13 @@ static void *signal_listener(void *arg) // and must be thread-safe, but not necessarily signal-handler safe if (critical) { jl_critical_error(sig, NULL, bt_data, &bt_size); - // FIXME - // It is unsafe to run the exit handler on this thread - // (this thread is not managed and has a rather limited stack space) - // try harder to run this on a managed thread. -#ifdef SIGINFO - if (sig != SIGINFO) -#else - if (sig != SIGUSR1) -#endif - jl_exit(128 + sig); + if (doexit) { + thread0_exit_count++; + jl_exit_thread0(128 + sig); + } } } + return NULL; } void restore_signals(void) @@ -482,8 +607,9 @@ void restore_signals(void) void fpe_handler(int sig, siginfo_t *info, void *context) { (void)info; + jl_ptls_t ptls = jl_get_ptls_states(); jl_unblock_signal(sig); - jl_throw_in_ctx(jl_diverror_exception, context); + jl_throw_in_ctx(ptls, jl_diverror_exception, context); } void jl_install_default_signal_handlers(void) diff --git a/src/threading.c b/src/threading.c index 6b1d373c7f1f6..664a28df8880a 100644 --- a/src/threading.c +++ b/src/threading.c @@ -777,14 +777,6 @@ void jl_init_threading(void) static jl_ptls_t _jl_all_tls_states; jl_all_tls_states = &_jl_all_tls_states; jl_n_threads = 1; - -#if defined(__linux__) && defined(JL_USE_INTEL_JITEVENTS) - if (jl_using_intel_jitevents) - // Intel VTune Amplifier needs at least 64k for alternate stack. - if (SIGSTKSZ < 1<<16) - sig_stack_size = 1<<16; -#endif - ti_init_master_thread(); }