From d7391e2befebc6787b0d618cf1a20dc0475cac91 Mon Sep 17 00:00:00 2001 From: Nick Robinson Date: Mon, 19 Aug 2024 12:25:57 +0100 Subject: [PATCH 01/10] record time threads started --- base/timing.jl | 4 ++++ src/threading.c | 14 +++++++++++++- src/threading.h | 1 + 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/base/timing.jl b/base/timing.jl index bdbb32936b56f..e09d2cdbde2cd 100644 --- a/base/timing.jl +++ b/base/timing.jl @@ -114,6 +114,10 @@ function jit_total_bytes() return ccall(:jl_jit_total_bytes, Csize_t, ()) end +# eventually this should give user CPU time (in nanoseconds), but for now it's just the +# time since Julia threads were started +user_cpu_time() = ccall(:jl_user_cpu_time, UInt64, ()) + # print elapsed time, return expression value const _mem_units = ["byte", "KiB", "MiB", "GiB", "TiB", "PiB"] const _cnt_units = ["", " k", " M", " G", " T", " P"] diff --git a/src/threading.c b/src/threading.c index 8f350d41f64b1..b095958299df8 100644 --- a/src/threading.c +++ b/src/threading.c @@ -49,6 +49,18 @@ JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled = 0; JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time = 0; JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time = 0; +static uint64_t jl_thread_start_time; +void jl_set_thread_start_time(void) +{ + jl_thread_start_time = jl_hrtime(); +} + +JL_DLLEXPORT uint64_t jl_user_cpu_time(void) +{ + uint64_t t = jl_hrtime(); + return t - jl_thread_start_time; // - jl_gc_total_hrtime(); +} + JL_DLLEXPORT void *jl_get_ptls_states(void) { // mostly deprecated: use current_task instead @@ -765,6 +777,7 @@ void jl_start_threads(void) } uv_thread_detach(&uvtid); } + jl_set_thread_start_time(); } _Atomic(unsigned) _threadedregion; // HACK: keep track of whether to prioritize IO or threading @@ -919,7 +932,6 @@ void _jl_mutex_unlock(jl_task_t *self, jl_mutex_t *lock) } } - // Make gc alignment available for threading // see threads.jl alignment JL_DLLEXPORT int jl_alignment(size_t sz) diff --git a/src/threading.h b/src/threading.h index cb26537699713..b6dfe95e08933 100644 --- a/src/threading.h +++ b/src/threading.h @@ -30,6 +30,7 @@ void jl_init_threadinginfra(void); void jl_parallel_gc_threadfun(void *arg); void jl_concurrent_gc_threadfun(void *arg); void jl_threadfun(void *arg); +void jl_set_thread_start_time(void); #ifdef __cplusplus } From 4cbb97fa3042168f0e2f7bc1b65f9bec869fd086 Mon Sep 17 00:00:00 2001 From: Nick Robinson Date: Mon, 19 Aug 2024 18:15:48 +0100 Subject: [PATCH 02/10] minus total gc time --- src/gc.h | 1 + src/threading.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gc.h b/src/gc.h index b4d421c708547..8e06ae1f1c3cf 100644 --- a/src/gc.h +++ b/src/gc.h @@ -573,6 +573,7 @@ void gc_sweep_pool_parallel(jl_ptls_t ptls); void gc_free_pages(void); void sweep_stack_pools(void); void jl_gc_debug_init(void); +uint64_t jl_gc_total_hrtime(void); // GC pages diff --git a/src/threading.c b/src/threading.c index b095958299df8..295a17bdf73f3 100644 --- a/src/threading.c +++ b/src/threading.c @@ -9,6 +9,7 @@ #include "julia.h" #include "julia_internal.h" #include "julia_assert.h" +#include "gc.h" #ifdef USE_ITTAPI #include "ittapi/ittnotify.h" @@ -55,10 +56,12 @@ void jl_set_thread_start_time(void) jl_thread_start_time = jl_hrtime(); } +// TODO: does this include interactive threads? should it? JL_DLLEXPORT uint64_t jl_user_cpu_time(void) { uint64_t t = jl_hrtime(); - return t - jl_thread_start_time; // - jl_gc_total_hrtime(); + // return t - jl_thread_start_time; + return t - jl_thread_start_time - jl_gc_total_hrtime(); } JL_DLLEXPORT void *jl_get_ptls_states(void) From 644fd2b1e0f4f9dedf5570ff47e54e69dcea1130 Mon Sep 17 00:00:00 2001 From: Nick Robinson Date: Wed, 21 Aug 2024 18:25:30 +0100 Subject: [PATCH 03/10] WIP timing GC and JL_LOCK --- base/locks-mt.jl | 4 ++++ src/gc.c | 6 ++++-- src/julia_threads.h | 9 +++++++++ src/safepoint.c | 6 +++++- src/threading.c | 24 ++++++++++++++++++++++++ 5 files changed, 46 insertions(+), 3 deletions(-) diff --git a/base/locks-mt.jl b/base/locks-mt.jl index 5d355b9ed200c..f5487b141726e 100644 --- a/base/locks-mt.jl +++ b/base/locks-mt.jl @@ -39,8 +39,12 @@ end Base.assert_havelock(l::SpinLock) = islocked(l) ? nothing : Base.concurrency_violation() function lock(l::SpinLock) + t0 = time_ns() + # ccall(:jl_time_spin_lock_begin, Cvoid, ()) while true if @inline trylock(l) + # ccall(:jl_time_spin_lock_end, Cvoid, ()) + ccall(:jl_record_lock_spin_time, Cvoid, (Culonglong,), time_ns() - t0) return end ccall(:jl_cpu_suspend, Cvoid, ()) diff --git a/src/gc.c b/src/gc.c index 4cb48ba72dfe1..c0dccc8758bf7 100644 --- a/src/gc.c +++ b/src/gc.c @@ -3741,7 +3741,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) int8_t old_state = jl_atomic_load_relaxed(&ptls->gc_state); jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING); // `jl_safepoint_start_gc()` makes sure only one thread can run the GC. - uint64_t t0 = jl_hrtime(); + uint64_t t0 = jl_hrtime(); // time we entered GC if (!jl_safepoint_start_gc()) { // either another thread is running GC, or the GC got disabled just now. jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING); @@ -3770,7 +3770,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) jl_gc_wait_for_the_world(gc_all_tls_states, gc_n_threads); JL_PROBE_GC_STOP_THE_WORLD(); - uint64_t t1 = jl_hrtime(); + uint64_t t1 = jl_hrtime(); // time user code stopped running uint64_t duration = t1 - t0; if (duration > gc_num.max_time_to_safepoint) gc_num.max_time_to_safepoint = duration; @@ -3798,6 +3798,8 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) jl_safepoint_end_gc(); jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING); JL_PROBE_GC_END(); + // Time how long GC took. + ptls->timing_tls.gc_time += jl_hrtime() - t1; // Only disable finalizers on current thread // Doing this on all threads is racy (it's impossible to check diff --git a/src/julia_threads.h b/src/julia_threads.h index 3d06d380d10c2..c0298861e54c6 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -124,6 +124,14 @@ typedef struct { struct _jl_bt_element_t; +typedef struct { + uint64_t start_time; + uint64_t sleep_time; + uint64_t lock_spin_time; + /* uint64_t lock_spin_entry; // needed to time Threads.SpinLock */ + uint64_t gc_time; +} jl_timing_tls_states_t; + // This includes all the thread local states we care about for a thread. // Changes to TLS field types must be reflected in codegen. #define JL_MAX_BT_SIZE 80000 @@ -155,6 +163,7 @@ typedef struct _jl_tls_states_t { // Counter to disable finalizer **on the current thread** int finalizers_inhibited; jl_gc_tls_states_t gc_tls; // this is very large, and the offset of the first member is baked into codegen + jl_timing_tls_states_t timing_tls; volatile sig_atomic_t defer_signal; _Atomic(struct _jl_task_t*) current_task; struct _jl_task_t *next_task; diff --git a/src/safepoint.c b/src/safepoint.c index c6f9a42059d1a..17fdd8c5263c1 100644 --- a/src/safepoint.c +++ b/src/safepoint.c @@ -161,7 +161,10 @@ void jl_safepoint_wait_gc(void) jl_task_t *ct = jl_current_task; (void)ct; JL_TIMING_SUSPEND_TASK(GC_SAFEPOINT, ct); // The thread should have set this is already - assert(jl_atomic_load_relaxed(&ct->ptls->gc_state) != 0); + jl_ptls_t ptls = ct->ptls; + assert(jl_atomic_load_relaxed(&ptls->gc_state) != 0); + // Time how long this thread is stopped while GC is running. + uint64_t t0 = jl_hrtime(); // Use normal volatile load in the loop for speed until GC finishes. // Then use an acquire load to make sure the GC result is visible on this thread. while (jl_atomic_load_relaxed(&jl_gc_running) || jl_atomic_load_acquire(&jl_gc_running)) { @@ -173,6 +176,7 @@ void jl_safepoint_wait_gc(void) uv_cond_wait(&safepoint_cond, &safepoint_lock); uv_mutex_unlock(&safepoint_lock); } + ptls->timing_tls.gc_time = jl_hrtime() - t0; } void jl_safepoint_enable_sigint(void) diff --git a/src/threading.c b/src/threading.c index 295a17bdf73f3..5509588ae5a6f 100644 --- a/src/threading.c +++ b/src/threading.c @@ -810,6 +810,26 @@ JL_DLLEXPORT void jl_exit_threaded_region(void) // Profiling stubs +JL_DLLEXPORT void jl_record_lock_spin_time(uint64_t time) JL_NOTSAFEPOINT +{ + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + ptls->timing_tls.lock_spin_time += time; +} + +/* void jl_time_spin_lock_begin(void) */ +/* { */ +/* jl_task_t *ct = jl_current_task; */ +/* jl_ptls_t ptls = ct->ptls; */ +/* ptls->timing_tls.user_lock_spin_entry = jl_hrtime(); */ +/* } */ +/* void jl_time_spin_lock_end(void) */ +/* { */ +/* jl_task_t *ct = jl_current_task; */ +/* jl_ptls_t ptls = ct->ptls; */ +/* ptls->timing_tls.user_lock_spin_time = jl_hrtime() - ptls->timing_tls.user_lock_spin_entry; */ +/* } */ + void _jl_mutex_init(jl_mutex_t *lock, const char *name) JL_NOTSAFEPOINT { jl_atomic_store_relaxed(&lock->owner, (jl_task_t*)NULL); @@ -832,10 +852,14 @@ void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint) return; } JL_TIMING(LOCK_SPIN, LOCK_SPIN); + /* ptls = self->ptls; */ + uint64_t t0 = jl_hrtime(); while (1) { if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) { lock->count = 1; jl_profile_lock_acquired(lock); + /* ptls->timing_tls.lock_spin_time += jl_hrtime() - t0; */ + jl_record_lock_spin_time(jl_hrtime() - t0); return; } if (safepoint) { From be78b81304199b93eb2010074281ca2f5edc32e8 Mon Sep 17 00:00:00 2001 From: Nick Robinson Date: Thu, 22 Aug 2024 15:05:25 +0100 Subject: [PATCH 04/10] WIP up time and user time functions --- base/timing.jl | 4 +++- src/gc.h | 1 - src/threading.c | 15 +++++++++------ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/base/timing.jl b/base/timing.jl index e09d2cdbde2cd..7f89ba8d2ffcb 100644 --- a/base/timing.jl +++ b/base/timing.jl @@ -116,7 +116,9 @@ end # eventually this should give user CPU time (in nanoseconds), but for now it's just the # time since Julia threads were started -user_cpu_time() = ccall(:jl_user_cpu_time, UInt64, ()) +# user_cpu_time() = ccall(:jl_user_cpu_time, UInt64, ()) +thread_up_time() = ccall(:jl_thread_up_time, UInt64, ()) +thread_user_time() = ccall(:jl_thread_user_time, UInt64, ()) # print elapsed time, return expression value const _mem_units = ["byte", "KiB", "MiB", "GiB", "TiB", "PiB"] diff --git a/src/gc.h b/src/gc.h index 8e06ae1f1c3cf..b4d421c708547 100644 --- a/src/gc.h +++ b/src/gc.h @@ -573,7 +573,6 @@ void gc_sweep_pool_parallel(jl_ptls_t ptls); void gc_free_pages(void); void sweep_stack_pools(void); void jl_gc_debug_init(void); -uint64_t jl_gc_total_hrtime(void); // GC pages diff --git a/src/threading.c b/src/threading.c index 5509588ae5a6f..2154bb793b6a4 100644 --- a/src/threading.c +++ b/src/threading.c @@ -9,7 +9,6 @@ #include "julia.h" #include "julia_internal.h" #include "julia_assert.h" -#include "gc.h" #ifdef USE_ITTAPI #include "ittapi/ittnotify.h" @@ -56,12 +55,16 @@ void jl_set_thread_start_time(void) jl_thread_start_time = jl_hrtime(); } -// TODO: does this include interactive threads? should it? -JL_DLLEXPORT uint64_t jl_user_cpu_time(void) +// TODO: not just current thread +JL_DLLEXPORT uint64_t jl_thread_up_time(void) { - uint64_t t = jl_hrtime(); - // return t - jl_thread_start_time; - return t - jl_thread_start_time - jl_gc_total_hrtime(); + return jl_hrtime() - jl_thread_start_time; +} +JL_DLLEXPORT uint64_t jl_thread_user_time(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + jl_timing_tls_states_t *timing = &ptls->timing_tls; + return jl_thread_up_time() - timing->gc_time - timing->lock_spin_time; } JL_DLLEXPORT void *jl_get_ptls_states(void) From 14099bbdf32f95859726b97b1a41823eb704b383 Mon Sep 17 00:00:00 2001 From: Nick Robinson Date: Wed, 28 Aug 2024 12:29:32 +0100 Subject: [PATCH 05/10] WIP time task-switch and sleep --- src/julia_threads.h | 1 + src/partr.c | 13 +++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/julia_threads.h b/src/julia_threads.h index c0298861e54c6..d3671e2ddfec9 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -127,6 +127,7 @@ struct _jl_bt_element_t; typedef struct { uint64_t start_time; uint64_t sleep_time; + uint64_t scheduler_time; uint64_t lock_spin_time; /* uint64_t lock_spin_entry; // needed to time Threads.SpinLock */ uint64_t gc_time; diff --git a/src/partr.c b/src/partr.c index 33631dc83c05a..b4b0b520c3f86 100644 --- a/src/partr.c +++ b/src/partr.c @@ -389,12 +389,14 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q, { jl_task_t *ct = jl_current_task; uint64_t start_cycles = 0; - + uint64_t t0 = jl_hrtime(); while (1) { + jl_ptls_t ptls = ct->ptls; jl_task_t *task = get_next_task(trypoptask, q); - if (task) + if (task) { + ptls->timing_tls.scheduler_time += jl_hrtime() - t0; return task; - + } // quick, race-y check to see if there seems to be any stuff in there jl_cpu_pause(); if (!check_empty(checkempty)) { @@ -403,7 +405,6 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q, } jl_cpu_pause(); - jl_ptls_t ptls = ct->ptls; if (sleep_check_after_threshold(&start_cycles) || (ptls->tid == 0 && (!jl_atomic_load_relaxed(&_threadedregion) || wait_empty))) { // acquire sleep-check lock jl_atomic_store_relaxed(&ptls->sleep_check_state, sleeping); @@ -425,6 +426,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q, JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls); } if (task) + ptls->timing_tls.scheduler_time += jl_hrtime() - t0; return task; continue; } @@ -433,6 +435,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q, jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls); } + ptls->timing_tls.scheduler_time += jl_hrtime() - t0; return task; } @@ -507,6 +510,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q, // the other threads will just wait for an individual wake signal to resume JULIA_DEBUG_SLEEPWAKE( ptls->sleep_enter = cycleclock() ); + uint64_t tsleep0 = jl_hrtime(); int8_t gc_state = jl_gc_safe_enter(ptls); uv_mutex_lock(&ptls->sleep_lock); while (may_sleep(ptls)) { @@ -523,6 +527,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q, assert(jl_atomic_load_relaxed(&ptls->sleep_check_state) == not_sleeping); uv_mutex_unlock(&ptls->sleep_lock); JULIA_DEBUG_SLEEPWAKE( ptls->sleep_leave = cycleclock() ); + ptls->timing_tls.sleep_time += jl_hrtime() - tsleep0; jl_gc_safe_leave(ptls, gc_state); // contains jl_gc_safepoint start_cycles = 0; if (task) { From 138385a614908eebeb23a8ffeb21c76e96234bdb Mon Sep 17 00:00:00 2001 From: Nick Robinson Date: Wed, 28 Aug 2024 12:29:44 +0100 Subject: [PATCH 06/10] remove unused code --- base/locks-mt.jl | 2 -- src/julia_threads.h | 1 - src/threading.c | 15 --------------- 3 files changed, 18 deletions(-) diff --git a/base/locks-mt.jl b/base/locks-mt.jl index f5487b141726e..4337a6213eccc 100644 --- a/base/locks-mt.jl +++ b/base/locks-mt.jl @@ -40,10 +40,8 @@ Base.assert_havelock(l::SpinLock) = islocked(l) ? nothing : Base.concurrency_vio function lock(l::SpinLock) t0 = time_ns() - # ccall(:jl_time_spin_lock_begin, Cvoid, ()) while true if @inline trylock(l) - # ccall(:jl_time_spin_lock_end, Cvoid, ()) ccall(:jl_record_lock_spin_time, Cvoid, (Culonglong,), time_ns() - t0) return end diff --git a/src/julia_threads.h b/src/julia_threads.h index d3671e2ddfec9..19d8711352205 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -129,7 +129,6 @@ typedef struct { uint64_t sleep_time; uint64_t scheduler_time; uint64_t lock_spin_time; - /* uint64_t lock_spin_entry; // needed to time Threads.SpinLock */ uint64_t gc_time; } jl_timing_tls_states_t; diff --git a/src/threading.c b/src/threading.c index 2154bb793b6a4..902f0b55e2eb9 100644 --- a/src/threading.c +++ b/src/threading.c @@ -820,19 +820,6 @@ JL_DLLEXPORT void jl_record_lock_spin_time(uint64_t time) JL_NOTSAFEPOINT ptls->timing_tls.lock_spin_time += time; } -/* void jl_time_spin_lock_begin(void) */ -/* { */ -/* jl_task_t *ct = jl_current_task; */ -/* jl_ptls_t ptls = ct->ptls; */ -/* ptls->timing_tls.user_lock_spin_entry = jl_hrtime(); */ -/* } */ -/* void jl_time_spin_lock_end(void) */ -/* { */ -/* jl_task_t *ct = jl_current_task; */ -/* jl_ptls_t ptls = ct->ptls; */ -/* ptls->timing_tls.user_lock_spin_time = jl_hrtime() - ptls->timing_tls.user_lock_spin_entry; */ -/* } */ - void _jl_mutex_init(jl_mutex_t *lock, const char *name) JL_NOTSAFEPOINT { jl_atomic_store_relaxed(&lock->owner, (jl_task_t*)NULL); @@ -855,13 +842,11 @@ void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint) return; } JL_TIMING(LOCK_SPIN, LOCK_SPIN); - /* ptls = self->ptls; */ uint64_t t0 = jl_hrtime(); while (1) { if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) { lock->count = 1; jl_profile_lock_acquired(lock); - /* ptls->timing_tls.lock_spin_time += jl_hrtime() - t0; */ jl_record_lock_spin_time(jl_hrtime() - t0); return; } From e0558bbdc86ea8f8b024c6a1dcb7ab1bdbc3d415 Mon Sep 17 00:00:00 2001 From: Nick Robinson Date: Wed, 28 Aug 2024 13:27:23 +0100 Subject: [PATCH 07/10] WIP per-thread stats in julia --- base/timing.jl | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/base/timing.jl b/base/timing.jl index 7f89ba8d2ffcb..c7a5d59e2f99e 100644 --- a/base/timing.jl +++ b/base/timing.jl @@ -114,11 +114,24 @@ function jit_total_bytes() return ccall(:jl_jit_total_bytes, Csize_t, ()) end -# eventually this should give user CPU time (in nanoseconds), but for now it's just the -# time since Julia threads were started -# user_cpu_time() = ccall(:jl_user_cpu_time, UInt64, ()) thread_up_time() = ccall(:jl_thread_up_time, UInt64, ()) thread_user_time() = ccall(:jl_thread_user_time, UInt64, ()) +# thread_user_time(tid::Integer) = ccall(:jl_thread_user_time, UInt64, (Cint,), Cint(tid)) +# function thread_user_time(pool::Symbol=:all) +# total = UInt64(0) +# pool in (:all, :default, :interactive) || throw(ArgumentError("invalid threadpool specified")) +# if pool === :all || pool === :default +# for tid in Threads.threadpooltids(:default) +# total += thread_user_time(tid) +# end +# end +# if pool === :all || pool === :interactive +# for tid in Threads.threadpooltids(:interactive) +# total += thread_user_time(tid) +# end +# end +# return total +# end # print elapsed time, return expression value const _mem_units = ["byte", "KiB", "MiB", "GiB", "TiB", "PiB"] From fb6597bcc90124be0b4aeceeaa112a954684123c Mon Sep 17 00:00:00 2001 From: Nick Robinson Date: Mon, 16 Sep 2024 16:44:51 +0100 Subject: [PATCH 08/10] fixup! WIP per-thread stats in julia --- base/timing.jl | 41 ++++++++++++++++++++++++----------------- src/threading.c | 4 ++-- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/base/timing.jl b/base/timing.jl index c7a5d59e2f99e..fba02e1a6ef0d 100644 --- a/base/timing.jl +++ b/base/timing.jl @@ -115,23 +115,30 @@ function jit_total_bytes() end thread_up_time() = ccall(:jl_thread_up_time, UInt64, ()) -thread_user_time() = ccall(:jl_thread_user_time, UInt64, ()) -# thread_user_time(tid::Integer) = ccall(:jl_thread_user_time, UInt64, (Cint,), Cint(tid)) -# function thread_user_time(pool::Symbol=:all) -# total = UInt64(0) -# pool in (:all, :default, :interactive) || throw(ArgumentError("invalid threadpool specified")) -# if pool === :all || pool === :default -# for tid in Threads.threadpooltids(:default) -# total += thread_user_time(tid) -# end -# end -# if pool === :all || pool === :interactive -# for tid in Threads.threadpooltids(:interactive) -# total += thread_user_time(tid) -# end -# end -# return total -# end + +_unsafe_thread_user_time(tid::Integer) = ccall(:jl_thread_user_time, UInt64, (Cint,), Cint(tid)) + +function thread_user_time(tid::Integer) + max_tid = Threads.threadpoolsize(:interactive) + Threads.threadpoolsize(:default) + 0 < tid ≤ max_tid || throw(ArgumentError("invalid thread id `$(tid)`")) + return _unsafe_thread_user_time(tid) +end + +function thread_user_time(pool::Symbol=:all) + total = UInt64(0) + pool in (:all, :default, :interactive) || throw(ArgumentError("invalid threadpool `$(repr(pool))`")) + if pool === :all || pool === :default + for tid in Threads.threadpooltids(:default) + total += _unsafe_thread_user_time(tid) + end + end + if pool === :all || pool === :interactive + for tid in Threads.threadpooltids(:interactive) + total += _unsafe_thread_user_time(tid) + end + end + return total +end # print elapsed time, return expression value const _mem_units = ["byte", "KiB", "MiB", "GiB", "TiB", "PiB"] diff --git a/src/threading.c b/src/threading.c index 902f0b55e2eb9..bfe53ef684d07 100644 --- a/src/threading.c +++ b/src/threading.c @@ -60,9 +60,9 @@ JL_DLLEXPORT uint64_t jl_thread_up_time(void) { return jl_hrtime() - jl_thread_start_time; } -JL_DLLEXPORT uint64_t jl_thread_user_time(void) +JL_DLLEXPORT uint64_t jl_thread_user_time(uint8_t tid) { - jl_ptls_t ptls = jl_current_task->ptls; + jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[tid]; jl_timing_tls_states_t *timing = &ptls->timing_tls; return jl_thread_up_time() - timing->gc_time - timing->lock_spin_time; } From 3484567cb92c40a83cf1337c49b368b1f117b2d9 Mon Sep 17 00:00:00 2001 From: Nick Robinson Date: Mon, 16 Sep 2024 16:48:36 +0100 Subject: [PATCH 09/10] Take out half-done spin time for now --- base/locks-mt.jl | 4 ++-- src/julia_threads.h | 2 +- src/threading.c | 18 +++++++++--------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/base/locks-mt.jl b/base/locks-mt.jl index 4337a6213eccc..5c5db758787ef 100644 --- a/base/locks-mt.jl +++ b/base/locks-mt.jl @@ -39,10 +39,10 @@ end Base.assert_havelock(l::SpinLock) = islocked(l) ? nothing : Base.concurrency_violation() function lock(l::SpinLock) - t0 = time_ns() + # t0 = time_ns() while true if @inline trylock(l) - ccall(:jl_record_lock_spin_time, Cvoid, (Culonglong,), time_ns() - t0) + # ccall(:jl_record_lock_spin_time, Cvoid, (Culonglong,), time_ns() - t0) return end ccall(:jl_cpu_suspend, Cvoid, ()) diff --git a/src/julia_threads.h b/src/julia_threads.h index 19d8711352205..63f0e457b71c7 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -128,7 +128,7 @@ typedef struct { uint64_t start_time; uint64_t sleep_time; uint64_t scheduler_time; - uint64_t lock_spin_time; + /* uint64_t lock_spin_time; */ uint64_t gc_time; } jl_timing_tls_states_t; diff --git a/src/threading.c b/src/threading.c index bfe53ef684d07..04076f52bdf59 100644 --- a/src/threading.c +++ b/src/threading.c @@ -64,7 +64,7 @@ JL_DLLEXPORT uint64_t jl_thread_user_time(uint8_t tid) { jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[tid]; jl_timing_tls_states_t *timing = &ptls->timing_tls; - return jl_thread_up_time() - timing->gc_time - timing->lock_spin_time; + return jl_thread_up_time() - timing->gc_time; } JL_DLLEXPORT void *jl_get_ptls_states(void) @@ -813,12 +813,12 @@ JL_DLLEXPORT void jl_exit_threaded_region(void) // Profiling stubs -JL_DLLEXPORT void jl_record_lock_spin_time(uint64_t time) JL_NOTSAFEPOINT -{ - jl_task_t *ct = jl_current_task; - jl_ptls_t ptls = ct->ptls; - ptls->timing_tls.lock_spin_time += time; -} +/* JL_DLLEXPORT void jl_record_lock_spin_time(uint64_t time) JL_NOTSAFEPOINT */ +/* { */ +/* jl_task_t *ct = jl_current_task; */ +/* jl_ptls_t ptls = ct->ptls; */ +/* ptls->timing_tls.lock_spin_time += time; */ +/* } */ void _jl_mutex_init(jl_mutex_t *lock, const char *name) JL_NOTSAFEPOINT { @@ -842,12 +842,12 @@ void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint) return; } JL_TIMING(LOCK_SPIN, LOCK_SPIN); - uint64_t t0 = jl_hrtime(); + /* uint64_t t0 = jl_hrtime(); */ while (1) { if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) { lock->count = 1; jl_profile_lock_acquired(lock); - jl_record_lock_spin_time(jl_hrtime() - t0); + /* jl_record_lock_spin_time(jl_hrtime() - t0); */ return; } if (safepoint) { From 9b457b90af1afb6ab79f58cc8bf432bf500de92d Mon Sep 17 00:00:00 2001 From: Nick Robinson Date: Mon, 16 Sep 2024 17:03:15 +0100 Subject: [PATCH 10/10] Remove scheduler time from user metric --- src/threading.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/threading.c b/src/threading.c index 04076f52bdf59..ee8c753b23a2b 100644 --- a/src/threading.c +++ b/src/threading.c @@ -64,7 +64,7 @@ JL_DLLEXPORT uint64_t jl_thread_user_time(uint8_t tid) { jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[tid]; jl_timing_tls_states_t *timing = &ptls->timing_tls; - return jl_thread_up_time() - timing->gc_time; + return jl_thread_up_time() - timing->gc_time - timing->scheduler_time; } JL_DLLEXPORT void *jl_get_ptls_states(void)