Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion base/timing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ struct GC_Num
end

gc_num() = ccall(:jl_gc_num, GC_Num, ())
reset_gc_stats() = ccall(:jl_gc_reset_stats, Cvoid, ())

# This type is to represent differences in the counters, so fields may be negative
struct GC_Diff
Expand Down
92 changes: 0 additions & 92 deletions src/gc-debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -603,91 +603,6 @@ void jl_gc_debug_print_status(void)
}
#endif

#ifdef OBJPROFILE
static htable_t obj_counts[3];
static htable_t obj_sizes[3];
void objprofile_count(void *ty, int old, int sz)
{
if (gc_verifying) return;
if ((intptr_t)ty <= 0x10) {
ty = (void*)jl_buff_tag;
}
else if (ty != (void*)jl_buff_tag && ty != jl_malloc_tag &&
jl_typeof(ty) == (jl_value_t*)jl_datatype_type &&
((jl_datatype_t*)ty)->instance) {
ty = jl_singleton_tag;
}
void **bp = ptrhash_bp(&obj_counts[old], ty);
if (*bp == HT_NOTFOUND)
*bp = (void*)2;
else
(*((intptr_t*)bp))++;
bp = ptrhash_bp(&obj_sizes[old], ty);
if (*bp == HT_NOTFOUND)
*bp = (void*)(intptr_t)(1 + sz);
else
*((intptr_t*)bp) += sz;
}

void objprofile_reset(void)
{
for (int g = 0; g < 3; g++) {
htable_reset(&obj_counts[g], 0);
htable_reset(&obj_sizes[g], 0);
}
}

static void objprofile_print(htable_t nums, htable_t sizes)
{
for(int i=0; i < nums.size; i+=2) {
if (nums.table[i+1] != HT_NOTFOUND) {
void *ty = nums.table[i];
int num = (intptr_t)nums.table[i + 1] - 1;
size_t sz = (uintptr_t)ptrhash_get(&sizes, ty) - 1;
static const int ptr_hex_width = 2 * sizeof(void*);
if (sz > 2e9) {
jl_safe_printf(" %6d : %*.1f GB of (%*p) ",
num, 6, ((double)sz) / 1024 / 1024 / 1024,
ptr_hex_width, ty);
}
else if (sz > 2e6) {
jl_safe_printf(" %6d : %*.1f MB of (%*p) ",
num, 6, ((double)sz) / 1024 / 1024,
ptr_hex_width, ty);
}
else if (sz > 2e3) {
jl_safe_printf(" %6d : %*.1f kB of (%*p) ",
num, 6, ((double)sz) / 1024,
ptr_hex_width, ty);
}
else {
jl_safe_printf(" %6d : %*d B of (%*p) ",
num, 6, (int)sz, ptr_hex_width, ty);
}
if (ty == (void*)jl_buff_tag)
jl_safe_printf("#<buffer>");
else if (ty == jl_malloc_tag)
jl_safe_printf("#<malloc>");
else if (ty == jl_singleton_tag)
jl_safe_printf("#<singletons>");
else
jl_static_show(JL_STDERR, (jl_value_t*)ty);
jl_safe_printf("\n");
}
}
}

void objprofile_printall(void)
{
jl_safe_printf("Transient mark :\n");
objprofile_print(obj_counts[0], obj_sizes[0]);
jl_safe_printf("Perm mark :\n");
objprofile_print(obj_counts[1], obj_sizes[1]);
jl_safe_printf("Remset :\n");
objprofile_print(obj_counts[2], obj_sizes[2]);
}
#endif

#if defined(GC_TIME) || defined(GC_FINAL_STATS)
STATIC_INLINE double jl_ns2ms(int64_t t)
{
Expand Down Expand Up @@ -996,13 +911,6 @@ void jl_gc_debug_init(void)
arraylist_new(&lostval_parents_done, 0);
#endif

#ifdef OBJPROFILE
for (int g = 0; g < 3; g++) {
htable_new(&obj_counts[g], 0);
htable_new(&obj_sizes[g], 0);
}
#endif

#ifdef GC_FINAL_STATS
process_t0 = jl_hrtime();
#endif
Expand Down
96 changes: 44 additions & 52 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -861,8 +861,6 @@ STATIC_INLINE void gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o,
gc_queue_big_marked(ptls, hdr, 1);
}
}
objprofile_count(jl_typeof(jl_valueof(o)),
mark_mode == GC_OLD_MARKED, hdr->sz);
}

// This function should be called exactly once during marking for each pool
Expand All @@ -884,8 +882,6 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o,
page->has_young = 1;
}
}
objprofile_count(jl_typeof(jl_valueof(o)),
mark_mode == GC_OLD_MARKED, page->osize);
page->has_marked = 1;
#endif
}
Expand Down Expand Up @@ -1528,9 +1524,16 @@ static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_
gc_page_profile_write_to_file(s);
gc_update_page_fragmentation_data(pg);
gc_time_count_page(freedall, pg_skpd);
jl_ptls_t ptls = gc_all_tls_states[pg->thread_n];
jl_atomic_fetch_add(&ptls->gc_num.pool_live_bytes, GC_PAGE_SZ - GC_PAGE_OFFSET - nfree * osize);
jl_atomic_fetch_add((_Atomic(int64_t) *)&gc_num.freed, (nfree - old_nfree) * osize);
jl_ptls_t ptls = jl_current_task->ptls;
// Note that we aggregate the `pool_live_bytes` over all threads before returning this
// value to the user. It doesn't matter how the `pool_live_bytes` are partitioned among
// the threads as long as the sum is correct. Let's add the `pool_live_bytes` to the current thread
// instead of adding it to the thread that originally allocated the page, so we can avoid
// an atomic-fetch-add here.
size_t delta = (GC_PAGE_SZ - GC_PAGE_OFFSET - nfree * osize);
jl_atomic_store_relaxed(&ptls->gc_num.pool_live_bytes,
jl_atomic_load_relaxed(&ptls->gc_num.pool_live_bytes) + delta);
jl_atomic_fetch_add_relaxed((_Atomic(int64_t) *)&gc_num.freed, (nfree - old_nfree) * osize);
}

// the actual sweeping over all allocated pages in a memory pool
Expand Down Expand Up @@ -1632,9 +1635,11 @@ void gc_sweep_wake_all(jl_ptls_t ptls, jl_gc_padded_page_stack_t *new_gc_allocd_
if (parallel_sweep_worthwhile && !page_profile_enabled) {
jl_atomic_store(&gc_allocd_scratch, new_gc_allocd_scratch);
uv_mutex_lock(&gc_threads_lock);
for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) {
int first = gc_first_parallel_collector_thread_id();
int last = gc_last_parallel_collector_thread_id();
for (int i = first; i <= last; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
assert(ptls2 != NULL); // should be a GC thread
gc_check_ptls_of_parallel_collector_thread(ptls2);
jl_atomic_fetch_add(&ptls2->gc_sweeps_requested, 1);
}
uv_cond_broadcast(&gc_threads_cond);
Expand All @@ -1646,9 +1651,11 @@ void gc_sweep_wake_all(jl_ptls_t ptls, jl_gc_padded_page_stack_t *new_gc_allocd_
// collecting a page profile.
// wait for all to leave in order to ensure that a straggler doesn't
// try to enter sweeping after we set `gc_allocd_scratch` below.
for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) {
int first = gc_first_parallel_collector_thread_id();
int last = gc_last_parallel_collector_thread_id();
for (int i = first; i <= last; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
assert(ptls2 != NULL); // should be a GC thread
gc_check_ptls_of_parallel_collector_thread(ptls2);
while (jl_atomic_load_acquire(&ptls2->gc_sweeps_requested) != 0) {
jl_cpu_pause();
}
Expand Down Expand Up @@ -2666,8 +2673,6 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
size_t dtsz = l * sizeof(void *) + sizeof(jl_svec_t);
if (update_meta)
gc_setmark(ptls, o, bits, dtsz);
else if (foreign_alloc)
objprofile_count(jl_simplevector_type, bits == GC_OLD_MARKED, dtsz);
jl_value_t *objary_parent = new_obj;
jl_value_t **objary_begin = data;
jl_value_t **objary_end = data + l;
Expand All @@ -2678,17 +2683,13 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
else if (vtag == jl_module_tag << 4) {
if (update_meta)
gc_setmark(ptls, o, bits, sizeof(jl_module_t));
else if (foreign_alloc)
objprofile_count(jl_module_type, bits == GC_OLD_MARKED, sizeof(jl_module_t));
jl_module_t *mb_parent = (jl_module_t *)new_obj;
uintptr_t nptr = ((mb_parent->usings.len + 1) << 2) | (bits & GC_OLD);
gc_mark_module_binding(ptls, mb_parent, nptr, bits);
}
else if (vtag == jl_task_tag << 4) {
if (update_meta)
gc_setmark(ptls, o, bits, sizeof(jl_task_t));
else if (foreign_alloc)
objprofile_count(jl_task_type, bits == GC_OLD_MARKED, sizeof(jl_task_t));
jl_task_t *ta = (jl_task_t *)new_obj;
gc_scrub_record_task(ta);
if (gc_cblist_task_scanner) {
Expand Down Expand Up @@ -2757,16 +2758,12 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
size_t dtsz = jl_string_len(new_obj) + sizeof(size_t) + 1;
if (update_meta)
gc_setmark(ptls, o, bits, dtsz);
else if (foreign_alloc)
objprofile_count(jl_string_type, bits == GC_OLD_MARKED, dtsz);
}
else {
jl_datatype_t *vt = ijl_small_typeof[vtag / sizeof(*ijl_small_typeof)];
size_t dtsz = jl_datatype_size(vt);
if (update_meta)
gc_setmark(ptls, o, bits, dtsz);
else if (foreign_alloc)
objprofile_count(vt, bits == GC_OLD_MARKED, dtsz);
}
return;
}
Expand All @@ -2785,9 +2782,6 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
else
gc_setmark_big(ptls, o, bits);
}
else if (foreign_alloc) {
objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_array_t));
}
if (flags.how == 0) {
void *data_ptr = (char*)a + sizeof(jl_array_t) +jl_array_ndimwords(a->flags.ndims) * sizeof(size_t);
gc_heap_snapshot_record_hidden_edge(new_obj, data_ptr, jl_array_nbytes(a), 2);
Expand All @@ -2802,8 +2796,6 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
}
else if (flags.how == 2) {
if (update_meta || foreign_alloc) {
objprofile_count(jl_malloc_tag, bits == GC_OLD_MARKED,
jl_array_nbytes(a));
gc_heap_snapshot_record_hidden_edge(new_obj, a->data, jl_array_nbytes(a), flags.pooled);
if (bits == GC_OLD_MARKED) {
ptls->gc_cache.perm_scanned_bytes += jl_array_nbytes(a);
Expand Down Expand Up @@ -2870,8 +2862,6 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
size_t dtsz = jl_datatype_size(vt);
if (update_meta)
gc_setmark(ptls, o, bits, dtsz);
else if (foreign_alloc)
objprofile_count(vt, bits == GC_OLD_MARKED, dtsz);
if (vt == jl_weakref_type)
return;
const jl_datatype_layout_t *layout = vt->layout;
Expand Down Expand Up @@ -2999,19 +2989,25 @@ void gc_mark_and_steal(jl_ptls_t ptls)
// since we know chunks will likely expand into a lot
// of work for the mark loop
steal : {
int first = gc_first_parallel_collector_thread_id();
int last = gc_last_parallel_collector_thread_id();
// Try to steal chunk from random GC thread
for (int i = 0; i < 4 * jl_n_markthreads; i++) {
uint32_t v = gc_first_tid + cong(UINT64_MAX, UINT64_MAX, &ptls->rngseed) % jl_n_markthreads;
jl_gc_markqueue_t *mq2 = &gc_all_tls_states[v]->mark_queue;
int v = gc_random_parallel_collector_thread_id(ptls);
jl_ptls_t ptls2 = gc_all_tls_states[v];
gc_check_ptls_of_parallel_collector_thread(ptls2);
jl_gc_markqueue_t *mq2 = &ptls2->mark_queue;
c = gc_chunkqueue_steal_from(mq2);
if (c.cid != GC_empty_chunk) {
gc_mark_chunk(ptls, mq, &c);
goto pop;
}
}
// Sequentially walk GC threads to try to steal chunk
for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) {
jl_gc_markqueue_t *mq2 = &gc_all_tls_states[i]->mark_queue;
for (int i = first; i <= last; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
gc_check_ptls_of_parallel_collector_thread(ptls2);
jl_gc_markqueue_t *mq2 = &ptls2->mark_queue;
c = gc_chunkqueue_steal_from(mq2);
if (c.cid != GC_empty_chunk) {
gc_mark_chunk(ptls, mq, &c);
Expand All @@ -3028,15 +3024,19 @@ void gc_mark_and_steal(jl_ptls_t ptls)
}
// Try to steal pointer from random GC thread
for (int i = 0; i < 4 * jl_n_markthreads; i++) {
uint32_t v = gc_first_tid + cong(UINT64_MAX, UINT64_MAX, &ptls->rngseed) % jl_n_markthreads;
jl_gc_markqueue_t *mq2 = &gc_all_tls_states[v]->mark_queue;
int v = gc_random_parallel_collector_thread_id(ptls);
jl_ptls_t ptls2 = gc_all_tls_states[v];
gc_check_ptls_of_parallel_collector_thread(ptls2);
jl_gc_markqueue_t *mq2 = &ptls2->mark_queue;
new_obj = gc_ptr_queue_steal_from(mq2);
if (new_obj != NULL)
goto mark;
}
// Sequentially walk GC threads to try to steal pointer
for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) {
jl_gc_markqueue_t *mq2 = &gc_all_tls_states[i]->mark_queue;
for (int i = first; i <= last; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
gc_check_ptls_of_parallel_collector_thread(ptls2);
jl_gc_markqueue_t *mq2 = &ptls2->mark_queue;
new_obj = gc_ptr_queue_steal_from(mq2);
if (new_obj != NULL)
goto mark;
Expand Down Expand Up @@ -3096,12 +3096,13 @@ int gc_should_mark(void)
}
int tid = jl_atomic_load_relaxed(&gc_master_tid);
assert(tid != -1);
assert(gc_all_tls_states != NULL);
size_t work = gc_count_work_in_queue(gc_all_tls_states[tid]);
for (tid = gc_first_tid; tid < gc_first_tid + jl_n_markthreads; tid++) {
jl_ptls_t ptls2 = gc_all_tls_states[tid];
if (ptls2 == NULL) {
continue;
}
int first = gc_first_parallel_collector_thread_id();
int last = gc_last_parallel_collector_thread_id();
for (int i = first; i <= last; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
gc_check_ptls_of_parallel_collector_thread(ptls2);
work += gc_count_work_in_queue(ptls2);
}
// if there is a lot of work left, enter the mark loop
Expand Down Expand Up @@ -3199,7 +3200,6 @@ static void gc_premark(jl_ptls_t ptls2)
void **items = remset->items;
for (size_t i = 0; i < len; i++) {
jl_value_t *item = (jl_value_t *)items[i];
objprofile_count(jl_typeof(item), 2, 0);
jl_astaggedvalue(item)->bits.gc = GC_OLD_MARKED;
}
}
Expand Down Expand Up @@ -3399,13 +3399,6 @@ JL_DLLEXPORT jl_gc_num_t jl_gc_num(void)
return num;
}

JL_DLLEXPORT void jl_gc_reset_stats(void)
{
gc_num.max_pause = 0;
gc_num.max_memory = 0;
gc_num.max_time_to_safepoint = 0;
}

// TODO: these were supposed to be thread local
JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT
{
Expand Down Expand Up @@ -3479,7 +3472,8 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
jl_ptls_t ptls_dest = ptls;
jl_gc_markqueue_t *mq_dest = mq;
if (!single_threaded_mark) {
ptls_dest = gc_all_tls_states[gc_first_tid + t_i % jl_n_markthreads];
int dest_tid = gc_ith_parallel_collector_thread_id(t_i % jl_n_markthreads);
ptls_dest = gc_all_tls_states[dest_tid];
mq_dest = &ptls_dest->mark_queue;
}
if (ptls2 != NULL) {
Expand Down Expand Up @@ -3563,8 +3557,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)

gc_stats_all_pool();
gc_stats_big_obj();
objprofile_printall();
objprofile_reset();
gc_num.total_allocd += gc_num.allocd;
if (!prev_sweep_full)
promoted_bytes += perm_scanned_bytes - last_perm_scanned_bytes;
Expand Down
Loading