@@ -24,6 +24,8 @@ int gc_first_tid;
24
24
// Mutex/cond used to synchronize sleep/wakeup of GC threads
25
25
uv_mutex_t gc_threads_lock ;
26
26
uv_cond_t gc_threads_cond ;
27
+ // Mutex used to coordinate entry of GC threads in the mark loop
28
+ uv_mutex_t gc_queue_observer_lock ;
27
29
28
30
// Linked list of callback functions
29
31
@@ -2857,8 +2859,10 @@ void gc_mark_and_steal(jl_ptls_t ptls)
2857
2859
jl_gc_markqueue_t * mq = & ptls -> mark_queue ;
2858
2860
jl_gc_markqueue_t * mq_master = NULL ;
2859
2861
int master_tid = jl_atomic_load (& gc_master_tid );
2860
- if (master_tid != -1 )
2861
- mq_master = & gc_all_tls_states [master_tid ]-> mark_queue ;
2862
+ if (master_tid == -1 ) {
2863
+ return ;
2864
+ }
2865
+ mq_master = & gc_all_tls_states [master_tid ]-> mark_queue ;
2862
2866
void * new_obj ;
2863
2867
jl_gc_chunk_t c ;
2864
2868
pop : {
@@ -2933,28 +2937,108 @@ void gc_mark_and_steal(jl_ptls_t ptls)
2933
2937
}
2934
2938
}
2935
2939
2940
+ size_t gc_count_work_in_queue (jl_ptls_t ptls ) JL_NOTSAFEPOINT
2941
+ {
2942
+ // assume each chunk is worth 256 units of work and each pointer
2943
+ // is worth 1 unit of work
2944
+ size_t work = 256 * (jl_atomic_load_relaxed (& ptls -> mark_queue .chunk_queue .bottom ) -
2945
+ jl_atomic_load_relaxed (& ptls -> mark_queue .chunk_queue .top ));
2946
+ work += (jl_atomic_load_relaxed (& ptls -> mark_queue .ptr_queue .bottom ) -
2947
+ jl_atomic_load_relaxed (& ptls -> mark_queue .ptr_queue .top ));
2948
+ return work ;
2949
+ }
2950
+
2951
+ /**
2952
+ * Correctness argument for the mark-loop termination protocol.
2953
+ *
2954
+ * Safety properties:
2955
+ * - No work items shall be in any thread's queues when `gc_mark_loop_barrier` observes
2956
+ * that `gc_n_threads_marking` is zero.
2957
+ *
2958
+ * - No work item shall be stolen from the master thread (i.e. mutator thread which started
2959
+ * GC and which helped the `jl_n_gcthreads` - 1 threads to mark) after
2960
+ * `gc_mark_loop_barrier` observes that `gc_n_threads_marking` is zero. This property is
2961
+ * necessary because we call `gc_mark_loop_serial` after marking the finalizer list in
2962
+ * `_jl_gc_collect`, and want to ensure that we have the serial mark-loop semantics there,
2963
+ * and that no work is stolen from us at that point.
2964
+ *
2965
+ * Proof:
2966
+ * - Suppose the master thread observes that `gc_n_threads_marking` is zero in
2967
+ * `gc_mark_loop_barrier` and there is a work item left in one thread's queue at that point.
2968
+ * Since threads try to steal from all threads' queues, this implies that all threads must
2969
+ * have tried to steal from the queue which still has a work item left, but failed to do so,
2970
+ * which violates the semantics of Chase-Lev's work-stealing queue.
2971
+ *
2972
+ * - Let E1 be the event "master thread writes -1 to gc_master_tid" and E2 be the event
2973
+ * "master thread observes that `gc_n_threads_marking` is zero". Since we're using
2974
+ * sequentially consistent atomics, E1 => E2. Now suppose one thread which is spinning in
2975
+ * `gc_should_mark` tries to enter the mark-loop after E2. In order to do so, it must
2976
+ * increment `gc_n_threads_marking` to 1 in an event E3, and then read `gc_master_tid` in an
2977
+ * event E4. Since we're using sequentially consistent atomics, E3 => E4. Since we observed
2978
+ * `gc_n_threads_marking` as zero in E2, then E2 => E3, and we conclude E1 => E4, so that
2979
+ * the thread which is spinning in `gc_should_mark` must observe that `gc_master_tid` is -1
2980
+ * and therefore won't enter the mark-loop.
2981
+ */
2982
+
2983
+ int gc_should_mark (jl_ptls_t ptls )
2984
+ {
2985
+ int should_mark = 0 ;
2986
+ int n_threads_marking = jl_atomic_load (& gc_n_threads_marking );
2987
+ // fast path
2988
+ if (n_threads_marking == 0 ) {
2989
+ return 0 ;
2990
+ }
2991
+ uv_mutex_lock (& gc_queue_observer_lock );
2992
+ while (1 ) {
2993
+ int tid = jl_atomic_load (& gc_master_tid );
2994
+ // fast path
2995
+ if (tid == -1 ) {
2996
+ break ;
2997
+ }
2998
+ n_threads_marking = jl_atomic_load (& gc_n_threads_marking );
2999
+ // fast path
3000
+ if (n_threads_marking == 0 ) {
3001
+ break ;
3002
+ }
3003
+ size_t work = gc_count_work_in_queue (gc_all_tls_states [tid ]);
3004
+ for (tid = gc_first_tid ; tid < gc_first_tid + jl_n_gcthreads ; tid ++ ) {
3005
+ work += gc_count_work_in_queue (gc_all_tls_states [tid ]);
3006
+ }
3007
+ // if there is a lot of work left, enter the mark loop
3008
+ if (work >= 16 * n_threads_marking ) {
3009
+ jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
3010
+ should_mark = 1 ;
3011
+ break ;
3012
+ }
3013
+ jl_cpu_pause ();
3014
+ }
3015
+ uv_mutex_unlock (& gc_queue_observer_lock );
3016
+ return should_mark ;
3017
+ }
3018
+
3019
+ void gc_wake_all_for_marking (jl_ptls_t ptls )
3020
+ {
3021
+ jl_atomic_store (& gc_master_tid , ptls -> tid );
3022
+ uv_mutex_lock (& gc_threads_lock );
3023
+ jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
3024
+ uv_cond_broadcast (& gc_threads_cond );
3025
+ uv_mutex_unlock (& gc_threads_lock );
3026
+ }
3027
+
2936
3028
void gc_mark_loop_parallel (jl_ptls_t ptls , int master )
2937
3029
{
2938
- int backoff = GC_BACKOFF_MIN ;
2939
3030
if (master ) {
2940
- jl_atomic_store (& gc_master_tid , ptls -> tid );
2941
- // Wake threads up and try to do some work
2942
- uv_mutex_lock (& gc_threads_lock );
2943
- jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
2944
- uv_cond_broadcast (& gc_threads_cond );
2945
- uv_mutex_unlock (& gc_threads_lock );
3031
+ gc_wake_all_for_marking (ptls );
2946
3032
gc_mark_and_steal (ptls );
2947
3033
jl_atomic_fetch_add (& gc_n_threads_marking , -1 );
2948
3034
}
2949
- while (jl_atomic_load (& gc_n_threads_marking ) > 0 ) {
2950
- // Try to become a thief while other threads are marking
2951
- jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
2952
- if (jl_atomic_load (& gc_master_tid ) != -1 ) {
2953
- gc_mark_and_steal (ptls );
3035
+ while (1 ) {
3036
+ int should_mark = gc_should_mark (ptls );
3037
+ if (!should_mark ) {
3038
+ break ;
2954
3039
}
3040
+ gc_mark_and_steal (ptls );
2955
3041
jl_atomic_fetch_add (& gc_n_threads_marking , -1 );
2956
- // Failed to steal
2957
- gc_backoff (& backoff );
2958
3042
}
2959
3043
}
2960
3044
@@ -3728,6 +3812,7 @@ void jl_gc_init(void)
3728
3812
uv_mutex_init (& gc_perm_lock );
3729
3813
uv_mutex_init (& gc_threads_lock );
3730
3814
uv_cond_init (& gc_threads_cond );
3815
+ uv_mutex_init (& gc_queue_observer_lock );
3731
3816
3732
3817
jl_gc_init_page ();
3733
3818
jl_gc_debug_init ();
0 commit comments