From c207ad8ee73bbd41940f4bd15c958a4f41047859 Mon Sep 17 00:00:00 2001 From: Ivan Maidanski Date: Wed, 20 Apr 2022 08:31:43 +0300 Subject: [PATCH] Fix hang in select() called from suspend signal handler if TSan Issue #236 (bdwgc). Previously select() was used to sleep in the suspend signal handler while the thread is manually suspended. This is changed to use sigsuspend() instead. (But select() is still used for a reason when the thread is self-suspended.) * pthread_stop_world.c [!GC_OPENBSD_UTHREADS && !NACL] (GC_stop_count): Update comment. * pthread_stop_world.c [!GC_OPENBSD_UTHREADS && !NACL && GC_ENABLE_SUSPEND_THREAD] (GC_suspend_handler_inner): Remove calls of GC_store_stack_ptr(), sem_post(), GC_suspend_self_inner(), RESTORE_CANCEL() (dedicated to manual thread suspend). * pthread_stop_world.c [!GC_OPENBSD_UTHREADS && !NACL && GC_ENABLE_SUSPEND_THREAD && E2K] (GC_suspend_handler_inner): Remove backing_store_end and backing_store_ptr set and clear dedicated to manual thread suspend. * pthread_stop_world.c [!GC_OPENBSD_UTHREADS && !NACL && GC_ENABLE_SUSPEND_THREAD] (GC_suspend_handler_inner): Repeat sigsuspend() while suspend_cnt&1 and me->stop_info.ext_suspend_cnt is not updated. * pthread_stop_world.c [!GC_OPENBSD_UTHREADS && !NACL && GC_ENABLE_SUSPEND_THREAD] (GC_suspend_self_inner): Add DISABLE_CANCEL() and RESTORE_CANCEL(); refine TODO item. * pthread_stop_world.c [!GC_OPENBSD_UTHREADS && !NACL && GC_ENABLE_SUSPEND_THREAD && DEBUG_THREADS] (GC_suspend_self_inner): Log "suspend self" and "resume self" events. * pthread_stop_world.c [!GC_OPENBSD_UTHREADS && !NACL && GC_ENABLE_SUSPEND_THREAD] (GC_suspend_thread): Rename saved_stop_count local variable to next_stop_count; add assertion that self thread is not suspended; replace sem_wait() in a loop to suspend_restart_barrier(1); increment GC_stop_count on exit of the function as well (instead of restore). * pthread_stop_world.c [!GC_OPENBSD_UTHREADS && !NACL && GC_ENABLE_SUSPEND_THREAD] (GC_resume_thread): Add assertion that GC_stop_count is odd; call raise_signal(GC_sig_thr_restart) and suspend_restart_barrier(1). --- pthread_stop_world.c | 104 ++++++++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 46 deletions(-) diff --git a/pthread_stop_world.c b/pthread_stop_world.c index d8aee014e..ad1be33fb 100644 --- a/pthread_stop_world.c +++ b/pthread_stop_world.c @@ -134,14 +134,15 @@ static sigset_t suspend_handler_mask; STATIC volatile AO_t GC_stop_count; /* Incremented (to the nearest even value) at */ - /* the beginning of GC_stop_world() and once */ - /* more (to an odd value) at the beginning of */ - /* GC_start_world(). The lowest bit is */ - /* THREAD_RESTARTED one which, if set, means */ - /* it is safe for threads to restart, */ - /* i.e. they will see another suspend signal */ - /* before they are expected to stop (unless */ - /* they have stopped voluntarily). */ + /* the beginning of GC_stop_world() (or when */ + /* a thread is requested to be suspended by */ + /* GC_suspend_thread) and once more (to an odd */ + /* value) at the beginning of GC_start_world(). */ + /* The lowest bit is THREAD_RESTARTED one */ + /* which, if set, means it is safe for threads */ + /* to restart, i.e. they will see another */ + /* suspend signal before they are expected to */ + /* stop (unless they have stopped voluntarily). */ STATIC GC_bool GC_retry_signals = FALSE; @@ -355,32 +356,6 @@ STATIC void GC_suspend_handler_inner(ptr_t dummy GC_ATTR_UNUSED, GC_log_printf("Suspending %p\n", (void *)self); # endif me = GC_lookup_thread_async(self); - -# ifdef GC_ENABLE_SUSPEND_THREAD - suspend_cnt = (word)ao_load_async(&(me -> stop_info.ext_suspend_cnt)); - if ((suspend_cnt & 1) != 0) { - GC_store_stack_ptr(me); -# ifdef E2K - GC_ASSERT(NULL == me -> backing_store_end); - GET_PROCEDURE_STACK_LOCAL(&bs_lo, &stack_size); - me -> backing_store_end = bs_lo; - me -> backing_store_ptr = bs_lo + stack_size; -# endif - sem_post(&GC_suspend_ack_sem); - GC_suspend_self_inner(me, suspend_cnt); -# ifdef DEBUG_THREADS - GC_log_printf("Continuing %p on GC_resume_thread\n", (void *)self); -# endif -# ifdef E2K - FREE_PROCEDURE_STACK_LOCAL(bs_lo, stack_size); - me -> backing_store_ptr = NULL; - me -> backing_store_end = NULL; -# endif - RESTORE_CANCEL(cancel_state); - return; - } -# endif - if ((me->stop_info.last_stop_count & ~(word)THREAD_RESTARTED) == my_stop_count) { /* Duplicate signal. OK if we are retrying. */ @@ -397,6 +372,9 @@ STATIC void GC_suspend_handler_inner(ptr_t dummy GC_ATTR_UNUSED, me -> backing_store_end = bs_lo; me -> backing_store_ptr = bs_lo + stack_size; # endif +# ifdef GC_ENABLE_SUSPEND_THREAD + suspend_cnt = (word)ao_load_async(&(me -> stop_info.ext_suspend_cnt)); +# endif /* Tell the thread that wants to stop the world that this */ /* thread has been stopped. Note that sem_post() is */ @@ -416,8 +394,14 @@ STATIC void GC_suspend_handler_inner(ptr_t dummy GC_ATTR_UNUSED, /* this code should not be executed. */ do { sigsuspend(&suspend_handler_mask); - } while (ao_load_acquire_async(&GC_stop_count) == my_stop_count); - /* iterate while not restarting the world */ + /* Iterate while not restarting the world or thread is suspended. */ + } while (ao_load_acquire_async(&GC_stop_count) == my_stop_count +# ifdef GC_ENABLE_SUSPEND_THREAD + || ((suspend_cnt & 1) != 0 + && (word)ao_load_acquire_async( + &(me -> stop_info.ext_suspend_cnt)) == suspend_cnt) +# endif + ); # ifdef DEBUG_THREADS GC_log_printf("Continuing %p\n", (void *)self); @@ -629,17 +613,27 @@ STATIC void GC_restart_handler(int sig) } GC_INNER void GC_suspend_self_inner(GC_thread me, word suspend_cnt) { + IF_CANCEL(int cancel_state;) + GC_ASSERT((suspend_cnt & 1) != 0); + DISABLE_CANCEL(cancel_state); +# ifdef DEBUG_THREADS + GC_log_printf("Suspend self: %p\n", (void *)(me -> id)); +# endif while ((word)ao_load_acquire_async(&(me -> stop_info.ext_suspend_cnt)) == suspend_cnt) { - /* TODO: Use sigsuspend() instead. */ + /* TODO: Use sigsuspend() even for self-suspended threads. */ GC_brief_async_signal_safe_sleep(); } +# ifdef DEBUG_THREADS + GC_log_printf("Resume self: %p\n", (void *)(me -> id)); +# endif + RESTORE_CANCEL(cancel_state); } GC_API void GC_CALL GC_suspend_thread(GC_SUSPEND_THREAD_ID thread) { GC_thread t; - AO_t saved_stop_count; + AO_t next_stop_count; word suspend_cnt; IF_CANCEL(int cancel_state;) DCL_LOCK_STATE; @@ -652,6 +646,7 @@ STATIC void GC_restart_handler(int sig) } suspend_cnt = (word)(t -> stop_info.ext_suspend_cnt); if ((suspend_cnt & 1) != 0) /* already suspended? */ { + GC_ASSERT(!THREAD_EQUAL((pthread_t)thread, pthread_self())); UNLOCK(); return; } @@ -691,9 +686,9 @@ STATIC void GC_restart_handler(int sig) /* be trying to acquire this lock too, and the suspend handler */ /* execution is deferred until the write fault handler completes. */ - saved_stop_count = GC_stop_count; - GC_ASSERT((saved_stop_count & THREAD_RESTARTED) != 0); - AO_store(&GC_stop_count, saved_stop_count + THREAD_RESTARTED); + next_stop_count = GC_stop_count + THREAD_RESTARTED; + GC_ASSERT((next_stop_count & THREAD_RESTARTED) == 0); + AO_store(&GC_stop_count, next_stop_count); /* Set the flag making the change visible to the signal handler. */ AO_store_release(&(t -> stop_info.ext_suspend_cnt), @@ -711,13 +706,10 @@ STATIC void GC_restart_handler(int sig) /* Wait for the thread to complete threads table lookup and */ /* stack_ptr assignment. */ GC_ASSERT(GC_thr_initialized); - while (sem_wait(&GC_suspend_ack_sem) != 0) { - if (errno != EINTR) - ABORT("sem_wait for handler failed (suspend_self)"); - } + suspend_restart_barrier(1); if (GC_manual_vdb) GC_release_dirty_lock(); - AO_store(&GC_stop_count, saved_stop_count); /* restore the counter */ + AO_store(&GC_stop_count, next_stop_count | THREAD_RESTARTED); RESTORE_CANCEL(cancel_state); UNLOCK(); @@ -733,8 +725,28 @@ STATIC void GC_restart_handler(int sig) word suspend_cnt = (word)(t -> stop_info.ext_suspend_cnt); if ((suspend_cnt & 1) != 0) /* is suspended? */ { + GC_ASSERT((GC_stop_count & THREAD_RESTARTED) != 0); /* Mark the thread as not suspended - it will be resumed shortly. */ AO_store(&(t -> stop_info.ext_suspend_cnt), (AO_t)(suspend_cnt + 1)); + + if ((t -> flags & (FINISHED | DO_BLOCKING)) == 0) { + int result = raise_signal(t, GC_sig_thr_restart); + + /* TODO: Support signal resending on GC_retry_signals */ + if (result != 0) + ABORT_ARG1("pthread_kill failed in GC_resume_thread", + ": errcode= %d", result); +# ifndef GC_NETBSD_THREADS_WORKAROUND + if (GC_retry_signals || GC_sig_suspend == GC_sig_thr_restart) +# endif + { + IF_CANCEL(int cancel_state;) + + DISABLE_CANCEL(cancel_state); + suspend_restart_barrier(1); + RESTORE_CANCEL(cancel_state); + } + } } } UNLOCK();