From 9314239f6665f6f1eb4b2f9029ab2736480f911e Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 27 Oct 2023 16:24:41 -0600 Subject: [PATCH 01/26] Patch to log variables boxed for dynamic dispatch --- src/cgutils.cpp | 10 +++++++--- src/codegen.cpp | 9 ++++++++- src/gc-alloc-profiler.cpp | 10 ++++++++++ src/jl_exported_funcs.inc | 1 + src/julia.h | 2 ++ 5 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 8ce84acb30901..24e49a76776fe 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -999,8 +999,7 @@ static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDN return load; } -static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v, bool is_promotable=false); - +static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v, bool is_promotable=false, bool log=false); static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull); static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull) @@ -3303,7 +3302,7 @@ static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsig // dynamically-typed value is required (e.g. argument to unknown function). // if it's already a pointer it's left alone. // Returns ctx.types().T_prjlvalue -static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotable) +static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotable, bool log_box) { jl_value_t *jt = vinfo.typ; if (jt == jl_bottom_type || jt == NULL) @@ -3320,6 +3319,11 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab return vinfo.V; } + if (log_box) { + Function *F = prepare_call(jl_log_box_func); + auto call = ctx.builder.CreateCall(F, {}); + } + Value *box; if (vinfo.TIndex) { SmallBitVector skip_none; diff --git a/src/codegen.cpp b/src/codegen.cpp index a4773acb3fbea..2118a2aa1f638 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -889,6 +889,13 @@ static const auto jlegalx_func = new JuliaFunction{ AttributeSet(), None); }, }; +static const auto jl_log_box_func = new JuliaFunction{ + XSTR(jl_nhd_log_box), + [](LLVMContext &C) { + return FunctionType::get(getVoidTy(C), {}, false); + }, + nullptr, +}; static const auto jl_alloc_obj_func = new JuliaFunction{ "julia.gc_alloc_obj", [](LLVMContext &C) { @@ -4072,7 +4079,7 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *t if (theF) theArgs.push_back(theF); for (size_t i = 0; i < nargs; i++) { - Value *arg = boxed(ctx, argv[i]); + Value *arg = boxed(ctx, argv[i], true); // log the boxes theArgs.push_back(arg); } CallInst *result = ctx.builder.CreateCall(TheTrampoline, theArgs); diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp index 1bcbeb2189f5f..d1b3ce1c57a96 100644 --- a/src/gc-alloc-profiler.cpp +++ b/src/gc-alloc-profiler.cpp @@ -78,6 +78,16 @@ jl_raw_backtrace_t get_raw_backtrace() JL_NOTSAFEPOINT { extern "C" { // Needed since these functions doesn't take any arguments. +extern int num_boxes = 0; + +JL_DLLEXPORT int jl_get_num_boxes() { + return num_boxes; +} + +JL_DLLEXPORT void jl_nhd_log_box() { + num_boxes++; +} + JL_DLLEXPORT void jl_start_alloc_profile(double sample_rate) { // We only need to do this once, the first time this is called. size_t nthreads = jl_atomic_load_acquire(&jl_n_threads); diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 82bdfc1f5b7eb..1a50278011d6a 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -352,6 +352,7 @@ XX(jl_new_method_table) \ XX(jl_new_method_uninit) \ XX(jl_new_module) \ + XX(jl_get_num_boxes) \ XX(jl_new_primitivetype) \ XX(jl_new_struct) \ XX(jl_new_structt) \ diff --git a/src/julia.h b/src/julia.h index a34c1f06d0cc1..5614b7261f6e9 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1439,6 +1439,8 @@ JL_DLLEXPORT const char *jl_typename_str(jl_value_t *v) JL_NOTSAFEPOINT; JL_DLLEXPORT const char *jl_typeof_str(jl_value_t *v) JL_NOTSAFEPOINT; JL_DLLEXPORT int jl_type_morespecific(jl_value_t *a, jl_value_t *b); +JL_DLLEXPORT void jl_nhd_log_box(); + STATIC_INLINE int jl_is_dispatch_tupletype(jl_value_t *v) JL_NOTSAFEPOINT { return jl_is_datatype(v) && ((jl_datatype_t*)v)->isdispatchtuple; From 4f09c0f9e9d2baab710786e674a39c8b9846da59 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Sun, 29 Oct 2023 11:21:41 -0600 Subject: [PATCH 02/26] Add log for boxing the input args --- src/cgutils.cpp | 5 +++++ src/codegen.cpp | 16 +++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 24e49a76776fe..386a0fcb890a8 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -2671,6 +2671,7 @@ static Value *emit_arraylen_prim(jl_codectx_t &ctx, const jl_cgval_t &tinfo) } } ++EmittedArraylen; + // TODO(PR): HERE? Value *t = boxed(ctx, tinfo); Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray, emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray), @@ -2722,6 +2723,7 @@ static Value *emit_arrayptr_internal(jl_codectx_t &ctx, const jl_cgval_t &tinfo, static Value *emit_arrayptr(jl_codectx_t &ctx, const jl_cgval_t &tinfo, bool isboxed = false) { + // TODO(PR): here? Value *t = boxed(ctx, tinfo); return emit_arrayptr_internal(ctx, tinfo, decay_derived(ctx, t), AddressSpace::Loaded, isboxed); } @@ -2808,6 +2810,7 @@ static Value *emit_array_nd_index( const jl_cgval_t *argv, size_t nidxs, jl_value_t *inbounds) { ++EmittedArrayNdIndex; + // TODO(PR): here? Value *a = boxed(ctx, ainfo); Value *i = Constant::getNullValue(getSizeTy(ctx.builder.getContext())); Value *stride = ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1); @@ -3319,7 +3322,9 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab return vinfo.V; } + //printf("NHD was here instead: %d!\n", log_box); if (log_box) { + //printf("NHD log_box"); Function *F = prepare_call(jl_log_box_func); auto call = ctx.builder.CreateCall(F, {}); } diff --git a/src/codegen.cpp b/src/codegen.cpp index 2118a2aa1f638..dd34ace8d60e3 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -4079,7 +4079,7 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *t if (theF) theArgs.push_back(theF); for (size_t i = 0; i < nargs; i++) { - Value *arg = boxed(ctx, argv[i], true); // log the boxes + Value *arg = boxed(ctx, argv[i], false, true); // log the boxes theArgs.push_back(arg); } CallInst *result = ctx.builder.CreateCall(TheTrampoline, theArgs); @@ -4151,6 +4151,9 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_ jl_cgval_t arg = argv[i]; if (isboxed) { assert(at == ctx.types().T_prjlvalue && et == ctx.types().T_prjlvalue); + // Note(NHD): I don't think this needs a log: this is only boxing it if it's actually + // needed by the callee, for already resolved static dispatch, which cannot be + // avoided. argvals[idx] = boxed(ctx, arg); } else if (et->isAggregateType()) { @@ -6730,6 +6733,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret break; } } + // TODO(PR): log this box for the return values ctx.builder.CreateRet(boxed(ctx, retval)); return w; } @@ -7625,7 +7629,11 @@ static jl_llvm_functions_t } } else { - Value *argp = boxed(ctx, theArg); + // This boxes the args? + // TODO(PR): What's this? It didn't seem to have any affect on hash + // I *think* this function is also for static, already resolved dispatch, + // so there's nothing that could be avoided here, and nothing to log. + Value *argp = boxed(ctx, theArg); //, false, true); ctx.builder.CreateStore(argp, vi.boxroot); } } @@ -8003,6 +8011,7 @@ static jl_llvm_functions_t Type *retty = f->getReturnType(); switch (returninfo.cc) { case jl_returninfo_t::Boxed: + // TODO(PR): here? return values? retval = boxed(ctx, retvalinfo); // skip the gcroot on the return path break; case jl_returninfo_t::Register: @@ -8231,7 +8240,8 @@ static jl_llvm_functions_t else if (VN->getType() == ctx.types().T_prjlvalue) { // Includes the jl_is_uniontype(phiType) && !TindexN case // TODO: if convert_julia_type says it is wasted effort and to skip it, is it worth using Constant::getNullValue(ctx.types().T_prjlvalue) (dynamically)? - V = boxed(ctx, val); + // TODO(PR): This one is the boxing for hash. + V = boxed(ctx, val, false, true); } else { // must be careful to emit undef here (rather than a bitcast or From ea85f1faaa4204c888b56789bd3e2dc11ab14632 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Sun, 29 Oct 2023 12:00:20 -0600 Subject: [PATCH 03/26] Add logging for return value too, split by a string key. But this crashed... --- src/cgutils.cpp | 11 +++++----- src/codegen.cpp | 16 +++++++------- src/gc-alloc-profiler.cpp | 45 ++++++++++++++++++++++++++++++++++----- src/julia.h | 2 +- 4 files changed, 54 insertions(+), 20 deletions(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 386a0fcb890a8..99e59faaf0807 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -999,7 +999,7 @@ static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDN return load; } -static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v, bool is_promotable=false, bool log=false); +static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v, bool is_promotable=false, const char *log_reason=NULL); static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull); static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull) @@ -3305,7 +3305,7 @@ static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsig // dynamically-typed value is required (e.g. argument to unknown function). // if it's already a pointer it's left alone. // Returns ctx.types().T_prjlvalue -static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotable, bool log_box) +static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotable, const char *log_reason) { jl_value_t *jt = vinfo.typ; if (jt == jl_bottom_type || jt == NULL) @@ -3322,11 +3322,10 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab return vinfo.V; } - //printf("NHD was here instead: %d!\n", log_box); - if (log_box) { - //printf("NHD log_box"); + if (log_reason != NULL) { Function *F = prepare_call(jl_log_box_func); - auto call = ctx.builder.CreateCall(F, {}); + auto call = ctx.builder.CreateCall(F, + stringConstPtr(ctx.emission_context, ctx.builder, log_reason)); } Value *box; diff --git a/src/codegen.cpp b/src/codegen.cpp index dd34ace8d60e3..b6a364db77cb2 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -892,7 +892,7 @@ static const auto jlegalx_func = new JuliaFunction{ static const auto jl_log_box_func = new JuliaFunction{ XSTR(jl_nhd_log_box), [](LLVMContext &C) { - return FunctionType::get(getVoidTy(C), {}, false); + return FunctionType::get(getVoidTy(C), {getInt8PtrTy(C)}, false); }, nullptr, }; @@ -4079,7 +4079,7 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *t if (theF) theArgs.push_back(theF); for (size_t i = 0; i < nargs; i++) { - Value *arg = boxed(ctx, argv[i], false, true); // log the boxes + Value *arg = boxed(ctx, argv[i], false, "input to jlcall"); // log the boxes theArgs.push_back(arg); } CallInst *result = ctx.builder.CreateCall(TheTrampoline, theArgs); @@ -6733,8 +6733,8 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret break; } } - // TODO(PR): log this box for the return values - ctx.builder.CreateRet(boxed(ctx, retval)); + // NOTE(PR): log this box for the return values + ctx.builder.CreateRet(boxed(ctx, retval, false, "boxed return value")); return w; } @@ -8011,8 +8011,8 @@ static jl_llvm_functions_t Type *retty = f->getReturnType(); switch (returninfo.cc) { case jl_returninfo_t::Boxed: - // TODO(PR): here? return values? - retval = boxed(ctx, retvalinfo); // skip the gcroot on the return path + // NOTE(PR): here? return values? + retval = boxed(ctx, retvalinfo, false, "boxed return value"); // skip the gcroot on the return path break; case jl_returninfo_t::Register: if (type_is_ghost(retty)) @@ -8240,8 +8240,8 @@ static jl_llvm_functions_t else if (VN->getType() == ctx.types().T_prjlvalue) { // Includes the jl_is_uniontype(phiType) && !TindexN case // TODO: if convert_julia_type says it is wasted effort and to skip it, is it worth using Constant::getNullValue(ctx.types().T_prjlvalue) (dynamically)? - // TODO(PR): This one is the boxing for hash. - V = boxed(ctx, val, false, true); + // NOTE(PR): This one is the boxing for hash. + V = boxed(ctx, val, false, "input to jlcall"); } else { // must be careful to emit undef here (rather than a bitcast or diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp index d1b3ce1c57a96..ec2d39becc598 100644 --- a/src/gc-alloc-profiler.cpp +++ b/src/gc-alloc-profiler.cpp @@ -7,6 +7,7 @@ #include #include +#include using std::string; using std::vector; @@ -74,18 +75,52 @@ jl_raw_backtrace_t get_raw_backtrace() JL_NOTSAFEPOINT { }; } +static auto num_boxes = std::map(); + +#include +#include +#include + // == exported interface == extern "C" { // Needed since these functions doesn't take any arguments. -extern int num_boxes = 0; +JL_DLLEXPORT int jl_nhd_total_boxes() { + // sum[v for (k,v) in num_boxes] + return (int)std::accumulate( + std::begin(num_boxes), std::end(num_boxes), 0, + [](const std::size_t previous, const auto& element) + { return previous + element.second; }); +} + +JL_DLLEXPORT jl_value_t* jl_get_num_boxes_keys() { + jl_array_t* v = jl_alloc_array_1d(jl_array_any_type, num_boxes.size()); + int i = 0; + for (auto it = num_boxes.begin(); it != num_boxes.end(); ++it) { + auto key = it->first; + jl_array_ptr_set(v, i, jl_cstr_to_string(key.c_str())); + i += 1; + } + return (jl_value_t*)v; +} -JL_DLLEXPORT int jl_get_num_boxes() { - return num_boxes; +JL_DLLEXPORT int jl_get_num_boxes(const char* key) { + std::string str_key = std::string(key); + if (num_boxes.find(str_key) == num_boxes.end()) { + return 0; + } + return num_boxes.at(str_key); } -JL_DLLEXPORT void jl_nhd_log_box() { - num_boxes++; +JL_DLLEXPORT void jl_nhd_log_box(const char* key) { + std::string str_key = std::string(key); + //std::cout << "jl_nhd_log_box: " << str_key << "\n"; + // If the key doesn't exist, create it with 1: + if (num_boxes.find(str_key) == num_boxes.end()) { + num_boxes.insert(std::pair(str_key, 1)); + } else { + num_boxes[str_key]++; + } } JL_DLLEXPORT void jl_start_alloc_profile(double sample_rate) { diff --git a/src/julia.h b/src/julia.h index 5614b7261f6e9..3ea503af568d2 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1439,7 +1439,7 @@ JL_DLLEXPORT const char *jl_typename_str(jl_value_t *v) JL_NOTSAFEPOINT; JL_DLLEXPORT const char *jl_typeof_str(jl_value_t *v) JL_NOTSAFEPOINT; JL_DLLEXPORT int jl_type_morespecific(jl_value_t *a, jl_value_t *b); -JL_DLLEXPORT void jl_nhd_log_box(); +JL_DLLEXPORT void jl_nhd_log_box(const char* key); STATIC_INLINE int jl_is_dispatch_tupletype(jl_value_t *v) JL_NOTSAFEPOINT { From f535a532d7a24fc8a2098cc24f41bd708d153406 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Sun, 29 Oct 2023 12:30:28 -0600 Subject: [PATCH 04/26] Simplify to an enum --- src/cgutils.cpp | 16 ++++++++------ src/codegen.cpp | 21 ++++++++++++------ src/gc-alloc-profiler.cpp | 45 +++++++++++---------------------------- src/gc-alloc-profiler.h | 6 ++++++ src/jl_exported_funcs.inc | 1 - 5 files changed, 43 insertions(+), 46 deletions(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 99e59faaf0807..146ca110fbb72 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -999,7 +999,7 @@ static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDN return load; } -static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v, bool is_promotable=false, const char *log_reason=NULL); +static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v, bool is_promotable=false, jl_count_box_type log_reason=JL_DONT_LOG_BOX); static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull); static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull) @@ -3305,7 +3305,7 @@ static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsig // dynamically-typed value is required (e.g. argument to unknown function). // if it's already a pointer it's left alone. // Returns ctx.types().T_prjlvalue -static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotable, const char *log_reason) +static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotable, jl_count_box_type log_reason) { jl_value_t *jt = vinfo.typ; if (jt == jl_bottom_type || jt == NULL) @@ -3322,10 +3322,14 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab return vinfo.V; } - if (log_reason != NULL) { - Function *F = prepare_call(jl_log_box_func); - auto call = ctx.builder.CreateCall(F, - stringConstPtr(ctx.emission_context, ctx.builder, log_reason)); + if (log_reason != JL_DONT_LOG_BOX) { + Function *F; + if (log_reason == JL_COUNT_BOX_INPUTS) { + F = prepare_call(jl_log_box_func_INPUTS); + } else { + F = prepare_call(jl_log_box_func_RETURNS); + } + ctx.builder.CreateCall(F, {}); } Value *box; diff --git a/src/codegen.cpp b/src/codegen.cpp index b6a364db77cb2..fc34195b6a623 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -889,10 +889,17 @@ static const auto jlegalx_func = new JuliaFunction{ AttributeSet(), None); }, }; -static const auto jl_log_box_func = new JuliaFunction{ - XSTR(jl_nhd_log_box), +static const auto jl_log_box_func_INPUTS = new JuliaFunction{ + XSTR(jl_nhd_log_box_input), [](LLVMContext &C) { - return FunctionType::get(getVoidTy(C), {getInt8PtrTy(C)}, false); + return FunctionType::get(getVoidTy(C), {}, false); + }, + nullptr, +}; +static const auto jl_log_box_func_RETURNS = new JuliaFunction{ + XSTR(jl_nhd_log_box_return), + [](LLVMContext &C) { + return FunctionType::get(getVoidTy(C), {}, false); }, nullptr, }; @@ -4079,7 +4086,7 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *t if (theF) theArgs.push_back(theF); for (size_t i = 0; i < nargs; i++) { - Value *arg = boxed(ctx, argv[i], false, "input to jlcall"); // log the boxes + Value *arg = boxed(ctx, argv[i], false, JL_COUNT_BOX_INPUTS); // log the boxes theArgs.push_back(arg); } CallInst *result = ctx.builder.CreateCall(TheTrampoline, theArgs); @@ -6734,7 +6741,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret } } // NOTE(PR): log this box for the return values - ctx.builder.CreateRet(boxed(ctx, retval, false, "boxed return value")); + ctx.builder.CreateRet(boxed(ctx, retval, false, JL_COUNT_BOX_RETURNS)); return w; } @@ -8012,7 +8019,7 @@ static jl_llvm_functions_t switch (returninfo.cc) { case jl_returninfo_t::Boxed: // NOTE(PR): here? return values? - retval = boxed(ctx, retvalinfo, false, "boxed return value"); // skip the gcroot on the return path + retval = boxed(ctx, retvalinfo, false, JL_COUNT_BOX_RETURNS); // skip the gcroot on the return path break; case jl_returninfo_t::Register: if (type_is_ghost(retty)) @@ -8241,7 +8248,7 @@ static jl_llvm_functions_t // Includes the jl_is_uniontype(phiType) && !TindexN case // TODO: if convert_julia_type says it is wasted effort and to skip it, is it worth using Constant::getNullValue(ctx.types().T_prjlvalue) (dynamically)? // NOTE(PR): This one is the boxing for hash. - V = boxed(ctx, val, false, "input to jlcall"); + V = boxed(ctx, val, false, JL_COUNT_BOX_INPUTS); } else { // must be careful to emit undef here (rather than a bitcast or diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp index ec2d39becc598..29be71d225ae9 100644 --- a/src/gc-alloc-profiler.cpp +++ b/src/gc-alloc-profiler.cpp @@ -75,8 +75,6 @@ jl_raw_backtrace_t get_raw_backtrace() JL_NOTSAFEPOINT { }; } -static auto num_boxes = std::map(); - #include #include #include @@ -85,42 +83,25 @@ static auto num_boxes = std::map(); extern "C" { // Needed since these functions doesn't take any arguments. +extern int num_boxes_inputs = 0; +extern int num_boxes_returns = 0; + JL_DLLEXPORT int jl_nhd_total_boxes() { - // sum[v for (k,v) in num_boxes] - return (int)std::accumulate( - std::begin(num_boxes), std::end(num_boxes), 0, - [](const std::size_t previous, const auto& element) - { return previous + element.second; }); + return num_boxes_inputs + num_boxes_returns; } -JL_DLLEXPORT jl_value_t* jl_get_num_boxes_keys() { - jl_array_t* v = jl_alloc_array_1d(jl_array_any_type, num_boxes.size()); - int i = 0; - for (auto it = num_boxes.begin(); it != num_boxes.end(); ++it) { - auto key = it->first; - jl_array_ptr_set(v, i, jl_cstr_to_string(key.c_str())); - i += 1; - } - return (jl_value_t*)v; +JL_DLLEXPORT int jl_nhd_boxes_inputs() { + return num_boxes_inputs; } - -JL_DLLEXPORT int jl_get_num_boxes(const char* key) { - std::string str_key = std::string(key); - if (num_boxes.find(str_key) == num_boxes.end()) { - return 0; - } - return num_boxes.at(str_key); +JL_DLLEXPORT int jl_nhd_boxes_returns() { + return num_boxes_returns; } -JL_DLLEXPORT void jl_nhd_log_box(const char* key) { - std::string str_key = std::string(key); - //std::cout << "jl_nhd_log_box: " << str_key << "\n"; - // If the key doesn't exist, create it with 1: - if (num_boxes.find(str_key) == num_boxes.end()) { - num_boxes.insert(std::pair(str_key, 1)); - } else { - num_boxes[str_key]++; - } +JL_DLLEXPORT void jl_nhd_log_box_input() { + num_boxes_inputs++; +} +JL_DLLEXPORT void jl_nhd_log_box_return() { + num_boxes_returns++; } JL_DLLEXPORT void jl_start_alloc_profile(double sample_rate) { diff --git a/src/gc-alloc-profiler.h b/src/gc-alloc-profiler.h index fcd8e45caa2d8..a20c304d43021 100644 --- a/src/gc-alloc-profiler.h +++ b/src/gc-alloc-profiler.h @@ -17,6 +17,12 @@ extern "C" { // Forward-declaration to avoid dependency in header file. struct jl_raw_alloc_t; // Defined in gc-alloc-profiler.cpp +enum jl_count_box_type { + JL_DONT_LOG_BOX, + JL_COUNT_BOX_INPUTS, + JL_COUNT_BOX_RETURNS, +}; + typedef struct { struct jl_raw_alloc_t *allocs; size_t num_allocs; diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 1a50278011d6a..82bdfc1f5b7eb 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -352,7 +352,6 @@ XX(jl_new_method_table) \ XX(jl_new_method_uninit) \ XX(jl_new_module) \ - XX(jl_get_num_boxes) \ XX(jl_new_primitivetype) \ XX(jl_new_struct) \ XX(jl_new_structt) \ From 1c3d4bdac6109eafd6a80aed88625d02f2294aa1 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Sun, 29 Oct 2023 17:10:52 -0600 Subject: [PATCH 05/26] Switch to uint64 to avoid overflow --- src/gc-alloc-profiler.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp index 29be71d225ae9..3768f0a933e2c 100644 --- a/src/gc-alloc-profiler.cpp +++ b/src/gc-alloc-profiler.cpp @@ -83,17 +83,17 @@ jl_raw_backtrace_t get_raw_backtrace() JL_NOTSAFEPOINT { extern "C" { // Needed since these functions doesn't take any arguments. -extern int num_boxes_inputs = 0; -extern int num_boxes_returns = 0; +extern uint64_t num_boxes_inputs = 0; +extern uint64_t num_boxes_returns = 0; -JL_DLLEXPORT int jl_nhd_total_boxes() { +JL_DLLEXPORT uint64_t jl_nhd_total_boxes() { return num_boxes_inputs + num_boxes_returns; } -JL_DLLEXPORT int jl_nhd_boxes_inputs() { +JL_DLLEXPORT uint64_t jl_nhd_boxes_inputs() { return num_boxes_inputs; } -JL_DLLEXPORT int jl_nhd_boxes_returns() { +JL_DLLEXPORT uint64_t jl_nhd_boxes_returns() { return num_boxes_returns; } From 2a891bdc49afab07532dd922a3212ed1bcadefc7 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Sun, 29 Oct 2023 17:14:57 -0600 Subject: [PATCH 06/26] Add option to force extra allocs, to estimate impact of reducing --- src/cgutils.cpp | 3 ++- src/codegen.cpp | 4 ++-- src/gc-alloc-profiler.cpp | 28 ++++++++++++++++++++++++++-- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 146ca110fbb72..b96d054651785 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -3329,7 +3329,8 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab } else { F = prepare_call(jl_log_box_func_RETURNS); } - ctx.builder.CreateCall(F, {}); + ctx.builder.CreateCall(F, + literal_pointer_val(ctx, (jl_value_t*)jt)); } Value *box; diff --git a/src/codegen.cpp b/src/codegen.cpp index fc34195b6a623..d2f6d1cce9a47 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -892,14 +892,14 @@ static const auto jlegalx_func = new JuliaFunction{ static const auto jl_log_box_func_INPUTS = new JuliaFunction{ XSTR(jl_nhd_log_box_input), [](LLVMContext &C) { - return FunctionType::get(getVoidTy(C), {}, false); + return FunctionType::get(getVoidTy(C), {getInt8PtrTy(C)}, false); }, nullptr, }; static const auto jl_log_box_func_RETURNS = new JuliaFunction{ XSTR(jl_nhd_log_box_return), [](LLVMContext &C) { - return FunctionType::get(getVoidTy(C), {}, false); + return FunctionType::get(getVoidTy(C), {getInt8PtrTy(C)}, false); }, nullptr, }; diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp index 3768f0a933e2c..ed5c0ddf578d2 100644 --- a/src/gc-alloc-profiler.cpp +++ b/src/gc-alloc-profiler.cpp @@ -97,10 +97,34 @@ JL_DLLEXPORT uint64_t jl_nhd_boxes_returns() { return num_boxes_returns; } -JL_DLLEXPORT void jl_nhd_log_box_input() { +static float extra_allocs_rate = 0.0f; +JL_DLLEXPORT void jl_nhd_set_extra_allocs_rate(float rate) { + extra_allocs_rate = rate; +} + +JL_DLLEXPORT void jl_nhd_log_box_input(jl_datatype_t* type) { + // Randomly, with a probability of `extra_allocs_rate`, we will allocate some number of + // extra objects. This is to measure the impact of reducing the number of allocations. + // If the rate is >1, we may allocate more than once. + // We pick a random number between 0 and extra_allocs_rate, then round it, and allocate + // that many extra objects. + // TODO(PR): ... Dunno why sometimes we get an invalid type in here.... + if (jl_is_datatype(type)) { + float num_extra_allocs = extra_allocs_rate; + jl_task_t *ct = jl_current_task; \ + while (num_extra_allocs > 1) { + num_extra_allocs--; + jl_gc_alloc(ct->ptls, jl_datatype_size(type), type); + } + // For the last one, we use a random float to decide whether to allocate or not. + float sample = float(rand()) / float(RAND_MAX); + if (sample < num_extra_allocs) { + jl_gc_alloc(ct->ptls, jl_datatype_size(type), type); + } + } num_boxes_inputs++; } -JL_DLLEXPORT void jl_nhd_log_box_return() { +JL_DLLEXPORT void jl_nhd_log_box_return(jl_value_t* _type) { num_boxes_returns++; } From 9593b341e693d5ee3aba77a44083fe74b9d6524b Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Sun, 12 Nov 2023 11:56:33 -0500 Subject: [PATCH 07/26] Remove TODOs Confirmed the correct `boxed` calls to log. --- src/cgutils.cpp | 3 --- src/codegen.cpp | 18 ++++++------------ 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index b96d054651785..7e4a6236f7337 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -2671,7 +2671,6 @@ static Value *emit_arraylen_prim(jl_codectx_t &ctx, const jl_cgval_t &tinfo) } } ++EmittedArraylen; - // TODO(PR): HERE? Value *t = boxed(ctx, tinfo); Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray, emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray), @@ -2723,7 +2722,6 @@ static Value *emit_arrayptr_internal(jl_codectx_t &ctx, const jl_cgval_t &tinfo, static Value *emit_arrayptr(jl_codectx_t &ctx, const jl_cgval_t &tinfo, bool isboxed = false) { - // TODO(PR): here? Value *t = boxed(ctx, tinfo); return emit_arrayptr_internal(ctx, tinfo, decay_derived(ctx, t), AddressSpace::Loaded, isboxed); } @@ -2810,7 +2808,6 @@ static Value *emit_array_nd_index( const jl_cgval_t *argv, size_t nidxs, jl_value_t *inbounds) { ++EmittedArrayNdIndex; - // TODO(PR): here? Value *a = boxed(ctx, ainfo); Value *i = Constant::getNullValue(getSizeTy(ctx.builder.getContext())); Value *stride = ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1); diff --git a/src/codegen.cpp b/src/codegen.cpp index d2f6d1cce9a47..93fd26527b26c 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -890,14 +890,14 @@ static const auto jlegalx_func = new JuliaFunction{ None); }, }; static const auto jl_log_box_func_INPUTS = new JuliaFunction{ - XSTR(jl_nhd_log_box_input), + XSTR(jl_log_box_input), [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), {getInt8PtrTy(C)}, false); }, nullptr, }; static const auto jl_log_box_func_RETURNS = new JuliaFunction{ - XSTR(jl_nhd_log_box_return), + XSTR(jl_log_box_return), [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), {getInt8PtrTy(C)}, false); }, @@ -4086,7 +4086,8 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *t if (theF) theArgs.push_back(theF); for (size_t i = 0; i < nargs; i++) { - Value *arg = boxed(ctx, argv[i], false, JL_COUNT_BOX_INPUTS); // log the boxes + // log the boxed arguments for this call + Value *arg = boxed(ctx, argv[i], false, JL_COUNT_BOX_INPUTS); theArgs.push_back(arg); } CallInst *result = ctx.builder.CreateCall(TheTrampoline, theArgs); @@ -4158,9 +4159,6 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_ jl_cgval_t arg = argv[i]; if (isboxed) { assert(at == ctx.types().T_prjlvalue && et == ctx.types().T_prjlvalue); - // Note(NHD): I don't think this needs a log: this is only boxing it if it's actually - // needed by the callee, for already resolved static dispatch, which cannot be - // avoided. argvals[idx] = boxed(ctx, arg); } else if (et->isAggregateType()) { @@ -6740,7 +6738,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret break; } } - // NOTE(PR): log this box for the return values + // log the box for this return value ctx.builder.CreateRet(boxed(ctx, retval, false, JL_COUNT_BOX_RETURNS)); return w; } @@ -7636,11 +7634,7 @@ static jl_llvm_functions_t } } else { - // This boxes the args? - // TODO(PR): What's this? It didn't seem to have any affect on hash - // I *think* this function is also for static, already resolved dispatch, - // so there's nothing that could be avoided here, and nothing to log. - Value *argp = boxed(ctx, theArg); //, false, true); + Value *argp = boxed(ctx, theArg); ctx.builder.CreateStore(argp, vi.boxroot); } } From 3ffc50c4120c7c59d5ce5668c64c4b75f35b8c4e Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Sun, 12 Nov 2023 12:29:33 -0500 Subject: [PATCH 08/26] Remove logging for a `boxed` call This one is a (rare) boxing of a static dispatch return. --- src/codegen.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index 93fd26527b26c..537766b02dced 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -8012,8 +8012,8 @@ static jl_llvm_functions_t Type *retty = f->getReturnType(); switch (returninfo.cc) { case jl_returninfo_t::Boxed: - // NOTE(PR): here? return values? - retval = boxed(ctx, retvalinfo, false, JL_COUNT_BOX_RETURNS); // skip the gcroot on the return path + // this is a boxing for a static dispatch return which happens sometimes + retval = boxed(ctx, retvalinfo); // skip the gcroot on the return path break; case jl_returninfo_t::Register: if (type_is_ghost(retty)) From 01ecaffa6a1209e6d1d2075015514f1aaff0e160 Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Sun, 12 Nov 2023 12:30:27 -0500 Subject: [PATCH 09/26] Remove unused prototype --- src/julia.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/julia.h b/src/julia.h index 3ea503af568d2..a34c1f06d0cc1 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1439,8 +1439,6 @@ JL_DLLEXPORT const char *jl_typename_str(jl_value_t *v) JL_NOTSAFEPOINT; JL_DLLEXPORT const char *jl_typeof_str(jl_value_t *v) JL_NOTSAFEPOINT; JL_DLLEXPORT int jl_type_morespecific(jl_value_t *a, jl_value_t *b); -JL_DLLEXPORT void jl_nhd_log_box(const char* key); - STATIC_INLINE int jl_is_dispatch_tupletype(jl_value_t *v) JL_NOTSAFEPOINT { return jl_is_datatype(v) && ((jl_datatype_t*)v)->isdispatchtuple; From 5d0c2c73bfa746d96bcc4ba1c93b1fd91f516406 Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Sun, 12 Nov 2023 12:43:29 -0500 Subject: [PATCH 10/26] Expand logging Log the size of the datatype being boxed, both for inputs and returns. Also, we cannot simply add in `jl_gc_alloc`s for our extra inputs as they will actually be removed in late lowering. Instead, we simply add to an extras count of boxed inputs and extras boxed input size. --- src/gc-alloc-profiler.cpp | 93 +++++++++++++++++++++++++++------------ 1 file changed, 65 insertions(+), 28 deletions(-) diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp index ed5c0ddf578d2..c15666ed1edc4 100644 --- a/src/gc-alloc-profiler.cpp +++ b/src/gc-alloc-profiler.cpp @@ -83,48 +83,85 @@ jl_raw_backtrace_t get_raw_backtrace() JL_NOTSAFEPOINT { extern "C" { // Needed since these functions doesn't take any arguments. -extern uint64_t num_boxes_inputs = 0; -extern uint64_t num_boxes_returns = 0; - -JL_DLLEXPORT uint64_t jl_nhd_total_boxes() { - return num_boxes_inputs + num_boxes_returns; +uint64_t num_boxes_inputs; +uint64_t boxed_inputs_size; +uint64_t extra_num_boxes_inputs; +uint64_t extra_boxed_inputs_size; +uint64_t num_boxes_returns; +uint64_t boxed_returns_size; + +JL_DLLEXPORT uint64_t jl_total_boxes() +{ + return num_boxes_inputs + extra_num_boxes_inputs + num_boxes_returns; } - -JL_DLLEXPORT uint64_t jl_nhd_boxes_inputs() { +JL_DLLEXPORT uint64_t jl_total_boxes_size() +{ + return boxed_inputs_size + extra_boxed_inputs_size + boxed_returns_size; +} +JL_DLLEXPORT uint64_t jl_num_boxes_inputs() +{ return num_boxes_inputs; } -JL_DLLEXPORT uint64_t jl_nhd_boxes_returns() { +JL_DLLEXPORT uint64_t jl_extra_num_boxes_inputs() +{ + return extra_num_boxes_inputs; +} +JL_DLLEXPORT uint64_t jl_boxed_inputs_size() +{ + return boxed_inputs_size; +} +JL_DLLEXPORT uint64_t jl_extra_boxed_inputs_size() +{ + return extra_boxed_inputs_size; +} +JL_DLLEXPORT uint64_t jl_num_boxes_returns() +{ return num_boxes_returns; } +JL_DLLEXPORT uint64_t jl_boxed_returns_size() +{ + return boxed_returns_size; +} static float extra_allocs_rate = 0.0f; -JL_DLLEXPORT void jl_nhd_set_extra_allocs_rate(float rate) { +JL_DLLEXPORT void jl_set_extra_allocs_rate(float rate) +{ extra_allocs_rate = rate; } -JL_DLLEXPORT void jl_nhd_log_box_input(jl_datatype_t* type) { - // Randomly, with a probability of `extra_allocs_rate`, we will allocate some number of - // extra objects. This is to measure the impact of reducing the number of allocations. - // If the rate is >1, we may allocate more than once. - // We pick a random number between 0 and extra_allocs_rate, then round it, and allocate - // that many extra objects. - // TODO(PR): ... Dunno why sometimes we get an invalid type in here.... - if (jl_is_datatype(type)) { - float num_extra_allocs = extra_allocs_rate; - jl_task_t *ct = jl_current_task; \ - while (num_extra_allocs > 1) { - num_extra_allocs--; - jl_gc_alloc(ct->ptls, jl_datatype_size(type), type); - } - // For the last one, we use a random float to decide whether to allocate or not. - float sample = float(rand()) / float(RAND_MAX); - if (sample < num_extra_allocs) { - jl_gc_alloc(ct->ptls, jl_datatype_size(type), type); +JL_DLLEXPORT void jl_log_box_input(jl_datatype_t* jt) { + // Randomly, with a probability of `extra_allocs_rate`, record some number of + // extra allocations. The goal is to estimate the impact of _reducing_ the + // number of allocations for boxing. For a rate >1, more than one allocation + // may be recorded: we pick a random number between 0 and extra_allocs_rate, + // then round it and allocate that many extra objects. + if (jl_is_datatype(jt)) { + boxed_inputs_size += jl_datatype_size(jt); + + // record extra allocs if configured + if (extra_allocs_rate > 0.0f) { + float num_extra_allocs = extra_allocs_rate; + jl_task_t *ct = jl_current_task; \ + while (num_extra_allocs > 1) { + num_extra_allocs--; + extra_num_boxes_inputs++; + extra_boxed_inputs_size += jl_datatype_size(jt); + } + // use a random float to decide whether to allocate or not for the last one + float sample = float(rand()) / float(RAND_MAX); + if (sample < num_extra_allocs) { + extra_num_boxes_inputs++; + extra_boxed_inputs_size += jl_datatype_size(jt); + } } } num_boxes_inputs++; } -JL_DLLEXPORT void jl_nhd_log_box_return(jl_value_t* _type) { +JL_DLLEXPORT void jl_log_box_return(jl_value_t* jt) +{ + if (jl_is_datatype(jt)) { + boxed_returns_size += jl_datatype_size(jt); + } num_boxes_returns++; } From eb527b1e115c5687c2464309ffc1d5c3e0dc67d8 Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Sun, 12 Nov 2023 12:46:33 -0500 Subject: [PATCH 11/26] Add a TODO comment --- src/cgutils.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 7e4a6236f7337..ca241bf0b529a 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -3319,6 +3319,7 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab return vinfo.V; } + // TODO(kp): sometimes !jl_is_datatype(jt) -- why? if (log_reason != JL_DONT_LOG_BOX) { Function *F; if (log_reason == JL_COUNT_BOX_INPUTS) { From b5863c21532ec6b62589715faf1272a0c046f2e1 Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Sun, 12 Nov 2023 13:21:49 -0500 Subject: [PATCH 12/26] Log number of dynamic dispatches for every method Also add an `options.h` macro -- JL_DISPATCH_LOG_BOXES -- to control the entire box logging feature. --- src/gf.c | 19 +++++++++++++++++++ src/julia.h | 3 +++ src/method.c | 3 +++ src/options.h | 3 +++ 4 files changed, 28 insertions(+) diff --git a/src/gf.c b/src/gf.c index 33849dd5aa387..7f338b31ccb4a 100644 --- a/src/gf.c +++ b/src/gf.c @@ -2937,9 +2937,28 @@ JL_DLLEXPORT jl_value_t *jl_apply_generic(jl_value_t *F, jl_value_t **args, uint jl_int32hash_fast(jl_return_address()), world); JL_GC_PROMISE_ROOTED(mfunc); +#ifdef JL_DISPATCH_LOG_BOXES + jl_method_t *def = mfunc->def.method; + if (jl_is_method(def)) { + def->num_dynamic_dispatches++; + } +#endif return _jl_invoke(F, args, nargs, mfunc, world); } +#ifdef JL_DISPATCH_LOG_BOXES +JL_DLLEXPORT uint64_t jl_get_num_dynamic_dispatches(jl_method_t *m) +{ + return m->num_dynamic_dispatches; +} +#else +JL_DLLEXPORT uint64_t jl_get_num_dynamic_dispatches(jl_method_t *m) +{ + jl_error("not logging"); + return 0; +} +#endif + static jl_method_match_t *_gf_invoke_lookup(jl_value_t *types JL_PROPAGATES_ROOT, jl_value_t *mt, size_t world, size_t *min_valid, size_t *max_valid) { jl_value_t *unw = jl_unwrap_unionall((jl_value_t*)types); diff --git a/src/julia.h b/src/julia.h index a34c1f06d0cc1..79667186036d3 100644 --- a/src/julia.h +++ b/src/julia.h @@ -351,6 +351,9 @@ typedef struct _jl_method_t { // hidden fields: // lock for modifications to the method jl_mutex_t writelock; +#ifdef JL_DISPATCH_LOG_BOXES + uint32_t num_dynamic_dispatches; +#endif } jl_method_t; // This type is a placeholder to cache data for a specType signature specialization of a Method diff --git a/src/method.c b/src/method.c index 4e48ef9122d79..a659b9aa44d56 100644 --- a/src/method.c +++ b/src/method.c @@ -806,6 +806,9 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module) m->is_for_opaque_closure = 0; m->constprop = 0; JL_MUTEX_INIT(&m->writelock); +#ifdef JL_DISPATCH_LOG_BOXES + m->num_dynamic_dispatches = 0; +#endif return m; } diff --git a/src/options.h b/src/options.h index 06af3e33fcbdc..ec75e11b15307 100644 --- a/src/options.h +++ b/src/options.h @@ -98,6 +98,9 @@ // profile generic (not inlined or specialized) calls to each function //#define JL_GF_PROFILE +// count boxes and box sizes for generic calls +#define JL_DISPATCH_LOG_BOXES + // task options --------------------------------------------------------------- From f5d8d0a091a828a2a4bec29d527b9ea334a14634 Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Sun, 12 Nov 2023 13:23:20 -0500 Subject: [PATCH 13/26] Put changes behind a `#ifdef` Put the box logging feature behind JL_DISPATCH_LOG_BOXES. --- src/cgutils.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index ca241bf0b529a..7c97aade05733 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -999,7 +999,11 @@ static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDN return load; } +#ifdef JL_DISPATCH_LOG_BOXES static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v, bool is_promotable=false, jl_count_box_type log_reason=JL_DONT_LOG_BOX); +#else +static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v, bool is_promotable=false); +#endif static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull); static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull) @@ -3302,7 +3306,11 @@ static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsig // dynamically-typed value is required (e.g. argument to unknown function). // if it's already a pointer it's left alone. // Returns ctx.types().T_prjlvalue +#ifdef JL_DISPATCH_LOG_BOXES static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotable, jl_count_box_type log_reason) +#else +static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotable) +#endif { jl_value_t *jt = vinfo.typ; if (jt == jl_bottom_type || jt == NULL) @@ -3319,6 +3327,7 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab return vinfo.V; } +#ifdef JL_DISPATCH_LOG_BOXES // TODO(kp): sometimes !jl_is_datatype(jt) -- why? if (log_reason != JL_DONT_LOG_BOX) { Function *F; @@ -3330,6 +3339,7 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab ctx.builder.CreateCall(F, literal_pointer_val(ctx, (jl_value_t*)jt)); } +#endif Value *box; if (vinfo.TIndex) { From 6748135f7c30cd5909e1c8b06d1eb254002ed4f0 Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Sun, 12 Nov 2023 13:35:50 -0500 Subject: [PATCH 14/26] Move feature behind `#ifdef` JL_DISPATCH_LOG_BOXES --- src/gc-alloc-profiler.cpp | 21 ++++++++------------- src/gc-alloc-profiler.h | 2 ++ 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp index c15666ed1edc4..42b0f307e9e99 100644 --- a/src/gc-alloc-profiler.cpp +++ b/src/gc-alloc-profiler.cpp @@ -7,7 +7,6 @@ #include #include -#include using std::string; using std::vector; @@ -75,10 +74,6 @@ jl_raw_backtrace_t get_raw_backtrace() JL_NOTSAFEPOINT { }; } -#include -#include -#include - // == exported interface == extern "C" { // Needed since these functions doesn't take any arguments. @@ -129,25 +124,24 @@ JL_DLLEXPORT void jl_set_extra_allocs_rate(float rate) extra_allocs_rate = rate; } +#ifdef JL_DISPATCH_LOG_BOXES JL_DLLEXPORT void jl_log_box_input(jl_datatype_t* jt) { - // Randomly, with a probability of `extra_allocs_rate`, record some number of - // extra allocations. The goal is to estimate the impact of _reducing_ the - // number of allocations for boxing. For a rate >1, more than one allocation - // may be recorded: we pick a random number between 0 and extra_allocs_rate, - // then round it and allocate that many extra objects. if (jl_is_datatype(jt)) { boxed_inputs_size += jl_datatype_size(jt); - // record extra allocs if configured + // Randomly, with a probability of `extra_allocs_rate`, record some number of + // extra allocations. The goal is to estimate the impact of _reducing_ the + // number of allocations for boxing. For a rate >1, more than one allocation + // may be recorded: we pick a random number between 0 and extra_allocs_rate, + // then round it and allocate that many extra objects. if (extra_allocs_rate > 0.0f) { float num_extra_allocs = extra_allocs_rate; - jl_task_t *ct = jl_current_task; \ while (num_extra_allocs > 1) { num_extra_allocs--; extra_num_boxes_inputs++; extra_boxed_inputs_size += jl_datatype_size(jt); } - // use a random float to decide whether to allocate or not for the last one + // decide whether or not to allocate for the last one float sample = float(rand()) / float(RAND_MAX); if (sample < num_extra_allocs) { extra_num_boxes_inputs++; @@ -164,6 +158,7 @@ JL_DLLEXPORT void jl_log_box_return(jl_value_t* jt) } num_boxes_returns++; } +#endif JL_DLLEXPORT void jl_start_alloc_profile(double sample_rate) { // We only need to do this once, the first time this is called. diff --git a/src/gc-alloc-profiler.h b/src/gc-alloc-profiler.h index a20c304d43021..e6c147913fa1d 100644 --- a/src/gc-alloc-profiler.h +++ b/src/gc-alloc-profiler.h @@ -17,11 +17,13 @@ extern "C" { // Forward-declaration to avoid dependency in header file. struct jl_raw_alloc_t; // Defined in gc-alloc-profiler.cpp +#ifdef JL_DISPATCH_LOG_BOXES enum jl_count_box_type { JL_DONT_LOG_BOX, JL_COUNT_BOX_INPUTS, JL_COUNT_BOX_RETURNS, }; +#endif typedef struct { struct jl_raw_alloc_t *allocs; From 6f56d27cae4181625901716025644952674c9cc4 Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Sun, 12 Nov 2023 14:04:05 -0500 Subject: [PATCH 15/26] Move feature under `#ifdef` JL_DISPATCH_LOG_BOXES --- src/codegen.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index 537766b02dced..7368d7c543571 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -889,6 +889,7 @@ static const auto jlegalx_func = new JuliaFunction{ AttributeSet(), None); }, }; +#ifdef JL_DISPATCH_LOG_BOXES static const auto jl_log_box_func_INPUTS = new JuliaFunction{ XSTR(jl_log_box_input), [](LLVMContext &C) { @@ -903,6 +904,7 @@ static const auto jl_log_box_func_RETURNS = new JuliaFunction{ }, nullptr, }; +#endif static const auto jl_alloc_obj_func = new JuliaFunction{ "julia.gc_alloc_obj", [](LLVMContext &C) { @@ -4086,8 +4088,12 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *t if (theF) theArgs.push_back(theF); for (size_t i = 0; i < nargs; i++) { +#ifdef JL_DISPATCH_LOG_BOXES // log the boxed arguments for this call Value *arg = boxed(ctx, argv[i], false, JL_COUNT_BOX_INPUTS); +#else + Value *arg = boxed(ctx, argv[i]); +#endif theArgs.push_back(arg); } CallInst *result = ctx.builder.CreateCall(TheTrampoline, theArgs); @@ -6738,8 +6744,12 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret break; } } +#ifdef JL_DISPATCH_LOG_BOXES // log the box for this return value ctx.builder.CreateRet(boxed(ctx, retval, false, JL_COUNT_BOX_RETURNS)); +#else + ctx.builder.CreateRet(boxed(ctx, retval)); +#endif return w; } @@ -8012,7 +8022,6 @@ static jl_llvm_functions_t Type *retty = f->getReturnType(); switch (returninfo.cc) { case jl_returninfo_t::Boxed: - // this is a boxing for a static dispatch return which happens sometimes retval = boxed(ctx, retvalinfo); // skip the gcroot on the return path break; case jl_returninfo_t::Register: @@ -8241,8 +8250,12 @@ static jl_llvm_functions_t else if (VN->getType() == ctx.types().T_prjlvalue) { // Includes the jl_is_uniontype(phiType) && !TindexN case // TODO: if convert_julia_type says it is wasted effort and to skip it, is it worth using Constant::getNullValue(ctx.types().T_prjlvalue) (dynamically)? +#ifdef JL_DISPATCH_LOG_BOXES // NOTE(PR): This one is the boxing for hash. V = boxed(ctx, val, false, JL_COUNT_BOX_INPUTS); +#else + V = boxed(ctx, val); +#endif } else { // must be careful to emit undef here (rather than a bitcast or From 038b31d5564989e3e52bb8b0bb3f45832facd5f6 Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Sun, 12 Nov 2023 14:34:22 -0500 Subject: [PATCH 16/26] Include `options.h` in `julia.h` --- src/julia.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/julia.h b/src/julia.h index 79667186036d3..7ac44cbacfef3 100644 --- a/src/julia.h +++ b/src/julia.h @@ -17,6 +17,7 @@ #include #include +#include "options.h" #include "htable.h" #include "arraylist.h" #include "analyzer_annotations.h" From f728499288db03ac11d52d7b3a8429c31e4a6612 Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Tue, 14 Nov 2023 14:03:16 -0500 Subject: [PATCH 17/26] Pass the size of `vinfo` instead of its datatype To the logging functions. Also some cleanup. --- src/cgutils.cpp | 9 ++--- src/codegen.cpp | 8 ++-- src/gc-alloc-profiler.cpp | 78 ++++++++++++++++++++------------------- 3 files changed, 48 insertions(+), 47 deletions(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 7c97aade05733..9b52b330dc32f 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -1110,7 +1110,6 @@ static Value *emit_datatype_size(jl_codectx_t &ctx, Value *dt) return ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t)))); } -/* this is valid code, it's simply unused static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p) { if (p.TIndex) { @@ -1159,6 +1158,7 @@ static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p) } } +/* this is valid code, it's simply unused static Value *emit_datatype_mutabl(jl_codectx_t &ctx, Value *dt) { jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); @@ -3332,12 +3332,11 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab if (log_reason != JL_DONT_LOG_BOX) { Function *F; if (log_reason == JL_COUNT_BOX_INPUTS) { - F = prepare_call(jl_log_box_func_INPUTS); + F = prepare_call(jllogboxinput_func); } else { - F = prepare_call(jl_log_box_func_RETURNS); + F = prepare_call(jllogboxreturn_func); } - ctx.builder.CreateCall(F, - literal_pointer_val(ctx, (jl_value_t*)jt)); + ctx.builder.CreateCall(F, emit_sizeof(ctx, vinfo)); } #endif diff --git a/src/codegen.cpp b/src/codegen.cpp index 7368d7c543571..a761c0494af4a 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -890,17 +890,17 @@ static const auto jlegalx_func = new JuliaFunction{ None); }, }; #ifdef JL_DISPATCH_LOG_BOXES -static const auto jl_log_box_func_INPUTS = new JuliaFunction{ +static const auto jllogboxinput_func = new JuliaFunction{ XSTR(jl_log_box_input), [](LLVMContext &C) { - return FunctionType::get(getVoidTy(C), {getInt8PtrTy(C)}, false); + return FunctionType::get(getVoidTy(C), {getSizeTy(C)}, false); }, nullptr, }; -static const auto jl_log_box_func_RETURNS = new JuliaFunction{ +static const auto jllogboxreturn_func = new JuliaFunction{ XSTR(jl_log_box_return), [](LLVMContext &C) { - return FunctionType::get(getVoidTy(C), {getInt8PtrTy(C)}, false); + return FunctionType::get(getVoidTy(C), {getSizeTy(C)}, false); }, nullptr, }; diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp index 42b0f307e9e99..300a5b7314258 100644 --- a/src/gc-alloc-profiler.cpp +++ b/src/gc-alloc-profiler.cpp @@ -78,28 +78,28 @@ jl_raw_backtrace_t get_raw_backtrace() JL_NOTSAFEPOINT { extern "C" { // Needed since these functions doesn't take any arguments. -uint64_t num_boxes_inputs; +uint64_t num_boxed_inputs; uint64_t boxed_inputs_size; -uint64_t extra_num_boxes_inputs; +uint64_t extra_num_boxed_inputs; uint64_t extra_boxed_inputs_size; -uint64_t num_boxes_returns; +uint64_t num_boxed_returns; uint64_t boxed_returns_size; JL_DLLEXPORT uint64_t jl_total_boxes() { - return num_boxes_inputs + extra_num_boxes_inputs + num_boxes_returns; + return num_boxed_inputs + extra_num_boxed_inputs + num_boxed_returns; } JL_DLLEXPORT uint64_t jl_total_boxes_size() { return boxed_inputs_size + extra_boxed_inputs_size + boxed_returns_size; } -JL_DLLEXPORT uint64_t jl_num_boxes_inputs() +JL_DLLEXPORT uint64_t jl_num_boxed_inputs() { - return num_boxes_inputs; + return num_boxed_inputs; } -JL_DLLEXPORT uint64_t jl_extra_num_boxes_inputs() +JL_DLLEXPORT uint64_t jl_extra_num_boxed_inputs() { - return extra_num_boxes_inputs; + return extra_num_boxed_inputs; } JL_DLLEXPORT uint64_t jl_boxed_inputs_size() { @@ -109,9 +109,9 @@ JL_DLLEXPORT uint64_t jl_extra_boxed_inputs_size() { return extra_boxed_inputs_size; } -JL_DLLEXPORT uint64_t jl_num_boxes_returns() +JL_DLLEXPORT uint64_t jl_num_boxed_returns() { - return num_boxes_returns; + return num_boxed_returns; } JL_DLLEXPORT uint64_t jl_boxed_returns_size() { @@ -125,38 +125,40 @@ JL_DLLEXPORT void jl_set_extra_allocs_rate(float rate) } #ifdef JL_DISPATCH_LOG_BOXES -JL_DLLEXPORT void jl_log_box_input(jl_datatype_t* jt) { - if (jl_is_datatype(jt)) { - boxed_inputs_size += jl_datatype_size(jt); - - // Randomly, with a probability of `extra_allocs_rate`, record some number of - // extra allocations. The goal is to estimate the impact of _reducing_ the - // number of allocations for boxing. For a rate >1, more than one allocation - // may be recorded: we pick a random number between 0 and extra_allocs_rate, - // then round it and allocate that many extra objects. - if (extra_allocs_rate > 0.0f) { - float num_extra_allocs = extra_allocs_rate; - while (num_extra_allocs > 1) { - num_extra_allocs--; - extra_num_boxes_inputs++; - extra_boxed_inputs_size += jl_datatype_size(jt); - } - // decide whether or not to allocate for the last one - float sample = float(rand()) / float(RAND_MAX); - if (sample < num_extra_allocs) { - extra_num_boxes_inputs++; - extra_boxed_inputs_size += jl_datatype_size(jt); - } +JL_DLLEXPORT void jl_log_box_input(size_t sz) +{ + num_boxed_inputs++; + boxed_inputs_size += sz; + + // Randomly, with a probability of `extra_allocs_rate`, record some number of + // extra allocations. The goal is to estimate the impact of _reducing_ the + // number of allocations for boxing. For a rate >1, more than one allocation + // may be recorded: we pick a random number between 0 and extra_allocs_rate, + // then round it and allocate that many extra objects. + if (extra_allocs_rate > 0.0f) { + float num_extra_allocs = extra_allocs_rate; + jl_value_t *extra_obj; + while (num_extra_allocs > 1) { + num_extra_allocs--; + extra_num_boxed_inputs++; + extra_boxed_inputs_size += sz; + extra_obj = jl_gc_allocobj(sz); + memset(extra_obj, 0, sz); + } + // decide whether or not to allocate for the last one + float sample = float(rand()) / float(RAND_MAX); + if (sample < num_extra_allocs) { + extra_num_boxed_inputs++; + extra_boxed_inputs_size += sz; + extra_obj = jl_gc_allocobj(sz); + memset(extra_obj, 0, sz); } } - num_boxes_inputs++; } -JL_DLLEXPORT void jl_log_box_return(jl_value_t* jt) +JL_DLLEXPORT void jl_log_box_return(size_t sz) { - if (jl_is_datatype(jt)) { - boxed_returns_size += jl_datatype_size(jt); - } - num_boxes_returns++; + num_boxed_returns++; + boxed_returns_size += sz; } #endif From 717300c7c085e581857500c7dad613c7922151fd Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Tue, 14 Nov 2023 15:27:08 -0500 Subject: [PATCH 18/26] Fix `emit_sizeof`'s use of `emit_typeof` --- src/cgutils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 9b52b330dc32f..6fd7553e9556a 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -1132,7 +1132,7 @@ static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p) ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)); ctx.builder.CreateCondBr(isboxed, dynloadBB, postBB); ctx.builder.SetInsertPoint(dynloadBB); - Value *datatype = emit_typeof(p.V); + Value *datatype = emit_typeof(ctx, p.V, false); Value *dyn_size = emit_datatype_size(ctx, datatype); ctx.builder.CreateBr(postBB); dynloadBB = ctx.builder.GetInsertBlock(); // could have changed From fd3cc3ad213cad7c215ee0e05adc3e26dc53c952 Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Tue, 14 Nov 2023 17:43:38 -0500 Subject: [PATCH 19/26] `emit_sizeof` emits an `i32` not a `size_t` --- src/codegen.cpp | 20 ++++++++++---------- src/gc-alloc-profiler.cpp | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index a761c0494af4a..53bcded3583c5 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -891,18 +891,18 @@ static const auto jlegalx_func = new JuliaFunction{ }; #ifdef JL_DISPATCH_LOG_BOXES static const auto jllogboxinput_func = new JuliaFunction{ - XSTR(jl_log_box_input), - [](LLVMContext &C) { - return FunctionType::get(getVoidTy(C), {getSizeTy(C)}, false); - }, - nullptr, + XSTR(jl_log_box_input), + [](LLVMContext &C) { + return FunctionType::get(getVoidTy(C), {getInt32Ty(C)}, false); + }, + nullptr, }; static const auto jllogboxreturn_func = new JuliaFunction{ - XSTR(jl_log_box_return), - [](LLVMContext &C) { - return FunctionType::get(getVoidTy(C), {getSizeTy(C)}, false); - }, - nullptr, + XSTR(jl_log_box_return), + [](LLVMContext &C) { + return FunctionType::get(getVoidTy(C), {getInt32Ty(C)}, false); + }, + nullptr, }; #endif static const auto jl_alloc_obj_func = new JuliaFunction{ diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp index 300a5b7314258..060f58a97046d 100644 --- a/src/gc-alloc-profiler.cpp +++ b/src/gc-alloc-profiler.cpp @@ -125,7 +125,7 @@ JL_DLLEXPORT void jl_set_extra_allocs_rate(float rate) } #ifdef JL_DISPATCH_LOG_BOXES -JL_DLLEXPORT void jl_log_box_input(size_t sz) +JL_DLLEXPORT void jl_log_box_input(int32_t sz) { num_boxed_inputs++; boxed_inputs_size += sz; @@ -155,7 +155,7 @@ JL_DLLEXPORT void jl_log_box_input(size_t sz) } } } -JL_DLLEXPORT void jl_log_box_return(size_t sz) +JL_DLLEXPORT void jl_log_box_return(int32_t sz) { num_boxed_returns++; boxed_returns_size += sz; From 10351adc2743fcf920fff61bfa7e6fbb713b4ce8 Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Thu, 16 Nov 2023 17:39:55 +0000 Subject: [PATCH 20/26] Cannot make `num_dynamic_dispatches` a "hidden" field Unsure why but it doesn't really matter. --- src/jltypes.c | 10 ++++++---- src/julia.h | 7 ++++--- src/method.c | 2 +- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/jltypes.c b/src/jltypes.c index 0767e9493bbc0..e7c1fd9d9c9c2 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -2600,7 +2600,7 @@ void jl_init_types(void) JL_GC_DISABLED jl_method_type = jl_new_datatype(jl_symbol("Method"), core, jl_any_type, jl_emptysvec, - jl_perm_symsvec(29, + jl_perm_symsvec(30, "name", "module", "file", @@ -2629,8 +2629,9 @@ void jl_init_types(void) JL_GC_DISABLED "pure", "is_for_opaque_closure", "constprop", - "purity"), - jl_svec(29, + "purity", + "num_dynamic_dispatches"), + jl_svec(30, jl_symbol_type, jl_module_type, jl_symbol_type, @@ -2659,7 +2660,8 @@ void jl_init_types(void) JL_GC_DISABLED jl_bool_type, jl_bool_type, jl_uint8_type, - jl_uint8_type), + jl_uint8_type, + jl_int32_type), jl_emptysvec, 0, 1, 10); //const static uint32_t method_constfields[1] = { 0x03fc065f }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<4)|(1<<6)|(1<<9)|(1<<10)|(1<<18)|(1<<19)|(1<<20)|(1<<21)|(1<<22)|(1<<23)|(1<<24)|(1<<25); diff --git a/src/julia.h b/src/julia.h index 7ac44cbacfef3..136cb5a968ba9 100644 --- a/src/julia.h +++ b/src/julia.h @@ -349,12 +349,13 @@ typedef struct _jl_method_t { // forcing the conclusion to always true. _jl_purity_overrides_t purity; -// hidden fields: - // lock for modifications to the method - jl_mutex_t writelock; #ifdef JL_DISPATCH_LOG_BOXES uint32_t num_dynamic_dispatches; #endif + +// hidden fields: + // lock for modifications to the method + jl_mutex_t writelock; } jl_method_t; // This type is a placeholder to cache data for a specType signature specialization of a Method diff --git a/src/method.c b/src/method.c index a659b9aa44d56..c5e595fa75719 100644 --- a/src/method.c +++ b/src/method.c @@ -805,10 +805,10 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module) m->deleted_world = ~(size_t)0; m->is_for_opaque_closure = 0; m->constprop = 0; - JL_MUTEX_INIT(&m->writelock); #ifdef JL_DISPATCH_LOG_BOXES m->num_dynamic_dispatches = 0; #endif + JL_MUTEX_INIT(&m->writelock); return m; } From 3810fff4ae548ebf3ed4f459237c8607cd8dfd60 Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Thu, 16 Nov 2023 14:12:32 -0500 Subject: [PATCH 21/26] Cosmetic --- src/gc-alloc-profiler.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp index 060f58a97046d..f0c0dde410066 100644 --- a/src/gc-alloc-profiler.cpp +++ b/src/gc-alloc-profiler.cpp @@ -97,14 +97,14 @@ JL_DLLEXPORT uint64_t jl_num_boxed_inputs() { return num_boxed_inputs; } -JL_DLLEXPORT uint64_t jl_extra_num_boxed_inputs() -{ - return extra_num_boxed_inputs; -} JL_DLLEXPORT uint64_t jl_boxed_inputs_size() { return boxed_inputs_size; } +JL_DLLEXPORT uint64_t jl_extra_num_boxed_inputs() +{ + return extra_num_boxed_inputs; +} JL_DLLEXPORT uint64_t jl_extra_boxed_inputs_size() { return extra_boxed_inputs_size; From ebdc9730520fdb010758569694ded8e5b3518207 Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Thu, 16 Nov 2023 17:46:02 -0500 Subject: [PATCH 22/26] Cannot include options.h in julia.h It breaks PackageCompiler in RAICode. Add `num_dynamic_dispatches` to `jl_method_t` unconditionally instead. --- src/julia.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/julia.h b/src/julia.h index 136cb5a968ba9..8db3b7225e24a 100644 --- a/src/julia.h +++ b/src/julia.h @@ -17,7 +17,6 @@ #include #include -#include "options.h" #include "htable.h" #include "arraylist.h" #include "analyzer_annotations.h" @@ -349,9 +348,7 @@ typedef struct _jl_method_t { // forcing the conclusion to always true. _jl_purity_overrides_t purity; -#ifdef JL_DISPATCH_LOG_BOXES uint32_t num_dynamic_dispatches; -#endif // hidden fields: // lock for modifications to the method From d50bfd233a18151259c2856023060cd0b792e966 Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Sat, 18 Nov 2023 18:02:24 -0500 Subject: [PATCH 23/26] Improve accuracy --- src/cgutils.cpp | 74 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 61 insertions(+), 13 deletions(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 6fd7553e9556a..c73b83a38d0ff 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -43,6 +43,22 @@ STATISTIC(EmittedWriteBarriers, "Number of write barriers emitted"); STATISTIC(EmittedNewStructs, "Number of new structs emitted"); STATISTIC(EmittedDeferSignal, "Number of deferred signals emitted"); +#ifdef JL_DISPATCH_LOG_BOXES +static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p); +void logbox(jl_codectx_t &ctx, const jl_cgval_t &vinfo, jl_count_box_type log_reason) +{ + if (log_reason != JL_DONT_LOG_BOX) { + Function *F; + if (log_reason == JL_COUNT_BOX_INPUTS) { + F = prepare_call(jllogboxinput_func); + } else { + F = prepare_call(jllogboxreturn_func); + } + ctx.builder.CreateCall(F, emit_sizeof(ctx, vinfo)); + } +} +#endif + static Value *track_pjlvalue(jl_codectx_t &ctx, Value *V) { assert(V->getType() == ctx.types().T_pjlvalue); @@ -3032,9 +3048,18 @@ static Value *load_i8box(jl_codectx_t &ctx, Value *v, jl_datatype_t *ty) (jl_value_t*)ty)); } +#ifdef JL_DISPATCH_LOG_BOXES +static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t, jl_count_box_type log_reason=JL_DONT_LOG_BOX); +#else +static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t); +#endif // some types have special boxing functions with small-value caches // Returns ctx.types().T_prjlvalue +#ifdef JL_DISPATCH_LOG_BOXES +static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t, jl_count_box_type log_reason) +#else static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t) +#endif { jl_value_t *jt = vinfo.typ; if (jt == (jl_value_t*)jl_bool_type) @@ -3090,6 +3115,11 @@ static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t assert(jb->instance != NULL); return track_pjlvalue(ctx, literal_pointer_val(ctx, jb->instance)); } +#ifdef JL_DISPATCH_LOG_BOXES + if (log_reason != JL_DONT_LOG_BOX && box && jb != jl_int8_type) { + logbox(ctx, vinfo, log_reason); + } +#endif return box; } @@ -3163,6 +3193,11 @@ static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut, return NULL; } +#ifdef JL_DISPATCH_LOG_BOXES +static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallBitVector &skip, jl_count_box_type log_reason=JL_DONT_LOG_BOX); +#else +static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallBitVector &skip); +#endif /* * Box unboxed values in a union. Optionally, skip certain unboxed values, * returning `Constant::getNullValue(ctx.types().T_pjlvalue)` in one of the skipped cases. If `skip` is not empty, @@ -3171,7 +3206,11 @@ static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut, * `vinfo` is already an unknown boxed union (union tag 0x80). */ // Returns ctx.types().T_prjlvalue +#ifdef JL_DISPATCH_LOG_BOXES +static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallBitVector &skip, jl_count_box_type log_reason) +#else static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallBitVector &skip) +#endif { // given vinfo::Union{T, S}, emit IR of the form: // ... @@ -3209,10 +3248,19 @@ static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallB } else { jl_cgval_t vinfo_r = jl_cgval_t(vinfo, (jl_value_t*)jt, NULL); +#ifdef JL_DISPATCH_LOG_BOXES + box = _boxed_special(ctx, vinfo_r, t, log_reason); +#else box = _boxed_special(ctx, vinfo_r, t); +#endif if (!box) { box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt)); init_bits_cgval(ctx, box, vinfo_r, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut); +#ifdef JL_DISPATCH_LOG_BOXES + if (log_reason != JL_DONT_LOG_BOX) { + logbox(ctx, vinfo_r, log_reason); + } +#endif } } tempBB = ctx.builder.GetInsertBlock(); // could have changed @@ -3327,30 +3375,25 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab return vinfo.V; } -#ifdef JL_DISPATCH_LOG_BOXES - // TODO(kp): sometimes !jl_is_datatype(jt) -- why? - if (log_reason != JL_DONT_LOG_BOX) { - Function *F; - if (log_reason == JL_COUNT_BOX_INPUTS) { - F = prepare_call(jllogboxinput_func); - } else { - F = prepare_call(jllogboxreturn_func); - } - ctx.builder.CreateCall(F, emit_sizeof(ctx, vinfo)); - } -#endif - Value *box; if (vinfo.TIndex) { SmallBitVector skip_none; +#ifdef JL_DISPATCH_LOG_BOXES + box = box_union(ctx, vinfo, skip_none, log_reason); +#else box = box_union(ctx, vinfo, skip_none); +#endif } else { assert(vinfo.V && "Missing data for unboxed value."); assert(jl_is_concrete_immutable(jt) && "This type shouldn't have been unboxed."); Type *t = julia_type_to_llvm(ctx, jt); assert(!type_is_ghost(t)); // ghost values should have been handled by vinfo.constant above! +#ifdef JL_DISPATCH_LOG_BOXES + box = _boxed_special(ctx, vinfo, t, log_reason); +#else box = _boxed_special(ctx, vinfo, t); +#endif if (!box) { bool do_promote = vinfo.promotion_point; if (do_promote && is_promotable) { @@ -3373,6 +3416,11 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt)); init_bits_cgval(ctx, box, vinfo, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut); } +#ifdef JL_DISPATCH_LOG_BOXES + if (log_reason != JL_DONT_LOG_BOX) { + logbox(ctx, vinfo, log_reason); + } +#endif } } return box; From 0999d363c3c8dd5fcf28563846e0d738b7aac6aa Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Thu, 23 Nov 2023 18:05:42 -0500 Subject: [PATCH 24/26] Add per-method record of time spent in generic lookup --- src/gf.c | 14 ++++++++++++++ src/jltypes.c | 10 ++++++---- src/julia.h | 1 + src/method.c | 1 + 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/gf.c b/src/gf.c index 7f338b31ccb4a..ed4ac118c61ad 100644 --- a/src/gf.c +++ b/src/gf.c @@ -2933,14 +2933,19 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t JL_DLLEXPORT jl_value_t *jl_apply_generic(jl_value_t *F, jl_value_t **args, uint32_t nargs) { size_t world = jl_current_task->world_age; +#ifdef JL_DISPATCH_LOG_BOXES + uint64_t t0 = jl_hrtime(); +#endif jl_method_instance_t *mfunc = jl_lookup_generic_(F, args, nargs, jl_int32hash_fast(jl_return_address()), world); JL_GC_PROMISE_ROOTED(mfunc); + #ifdef JL_DISPATCH_LOG_BOXES jl_method_t *def = mfunc->def.method; if (jl_is_method(def)) { def->num_dynamic_dispatches++; + def->dynamic_dispatch_ns += (jl_hrtime() - t0); } #endif return _jl_invoke(F, args, nargs, mfunc, world); @@ -2951,12 +2956,21 @@ JL_DLLEXPORT uint64_t jl_get_num_dynamic_dispatches(jl_method_t *m) { return m->num_dynamic_dispatches; } +JL_DLLEXPORT uint64_t jl_get_dynamic_dispatch_ns(jl_method_t *m) +{ + return m->dynamic_dispatch_ns; +} #else JL_DLLEXPORT uint64_t jl_get_num_dynamic_dispatches(jl_method_t *m) { jl_error("not logging"); return 0; } +JL_DLLEXPORT uint64_t jl_get_dynamic_dispatch_ns(jl_method_t *m) +{ + jl_error("not logging"); + return 0; +} #endif static jl_method_match_t *_gf_invoke_lookup(jl_value_t *types JL_PROPAGATES_ROOT, jl_value_t *mt, size_t world, size_t *min_valid, size_t *max_valid) diff --git a/src/jltypes.c b/src/jltypes.c index e7c1fd9d9c9c2..9bbaba1882242 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -2600,7 +2600,7 @@ void jl_init_types(void) JL_GC_DISABLED jl_method_type = jl_new_datatype(jl_symbol("Method"), core, jl_any_type, jl_emptysvec, - jl_perm_symsvec(30, + jl_perm_symsvec(31, "name", "module", "file", @@ -2630,8 +2630,9 @@ void jl_init_types(void) JL_GC_DISABLED "is_for_opaque_closure", "constprop", "purity", - "num_dynamic_dispatches"), - jl_svec(30, + "num_dynamic_dispatches", + "dynamic_dispatch_ns"), + jl_svec(31, jl_symbol_type, jl_module_type, jl_symbol_type, @@ -2661,7 +2662,8 @@ void jl_init_types(void) JL_GC_DISABLED jl_bool_type, jl_uint8_type, jl_uint8_type, - jl_int32_type), + jl_int32_type, + jl_uint64_type), jl_emptysvec, 0, 1, 10); //const static uint32_t method_constfields[1] = { 0x03fc065f }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<4)|(1<<6)|(1<<9)|(1<<10)|(1<<18)|(1<<19)|(1<<20)|(1<<21)|(1<<22)|(1<<23)|(1<<24)|(1<<25); diff --git a/src/julia.h b/src/julia.h index 8db3b7225e24a..0fb6113402ffe 100644 --- a/src/julia.h +++ b/src/julia.h @@ -349,6 +349,7 @@ typedef struct _jl_method_t { _jl_purity_overrides_t purity; uint32_t num_dynamic_dispatches; + uint64_t dynamic_dispatch_ns; // hidden fields: // lock for modifications to the method diff --git a/src/method.c b/src/method.c index c5e595fa75719..e70b0a159f179 100644 --- a/src/method.c +++ b/src/method.c @@ -807,6 +807,7 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module) m->constprop = 0; #ifdef JL_DISPATCH_LOG_BOXES m->num_dynamic_dispatches = 0; + m->dynamic_dispatch_ns = 0; #endif JL_MUTEX_INIT(&m->writelock); return m; From bbecd31a260fa9c37ddb4e239f7a088b7c418bb3 Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Sun, 26 Nov 2023 11:03:12 -0500 Subject: [PATCH 25/26] Fix added fields' types --- src/gf.c | 8 ++++---- src/jltypes.c | 2 +- src/julia.h | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/gf.c b/src/gf.c index ed4ac118c61ad..29edf63429bd9 100644 --- a/src/gf.c +++ b/src/gf.c @@ -2952,21 +2952,21 @@ JL_DLLEXPORT jl_value_t *jl_apply_generic(jl_value_t *F, jl_value_t **args, uint } #ifdef JL_DISPATCH_LOG_BOXES -JL_DLLEXPORT uint64_t jl_get_num_dynamic_dispatches(jl_method_t *m) +JL_DLLEXPORT int32_t jl_get_num_dynamic_dispatches(jl_method_t *m) { return m->num_dynamic_dispatches; } -JL_DLLEXPORT uint64_t jl_get_dynamic_dispatch_ns(jl_method_t *m) +JL_DLLEXPORT int64_t jl_get_dynamic_dispatch_ns(jl_method_t *m) { return m->dynamic_dispatch_ns; } #else -JL_DLLEXPORT uint64_t jl_get_num_dynamic_dispatches(jl_method_t *m) +JL_DLLEXPORT int32_t jl_get_num_dynamic_dispatches(jl_method_t *m) { jl_error("not logging"); return 0; } -JL_DLLEXPORT uint64_t jl_get_dynamic_dispatch_ns(jl_method_t *m) +JL_DLLEXPORT int64_t jl_get_dynamic_dispatch_ns(jl_method_t *m) { jl_error("not logging"); return 0; diff --git a/src/jltypes.c b/src/jltypes.c index 9bbaba1882242..fc651befca365 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -2663,7 +2663,7 @@ void jl_init_types(void) JL_GC_DISABLED jl_uint8_type, jl_uint8_type, jl_int32_type, - jl_uint64_type), + jl_int64_type), jl_emptysvec, 0, 1, 10); //const static uint32_t method_constfields[1] = { 0x03fc065f }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<4)|(1<<6)|(1<<9)|(1<<10)|(1<<18)|(1<<19)|(1<<20)|(1<<21)|(1<<22)|(1<<23)|(1<<24)|(1<<25); diff --git a/src/julia.h b/src/julia.h index 0fb6113402ffe..2da6c78941616 100644 --- a/src/julia.h +++ b/src/julia.h @@ -348,8 +348,8 @@ typedef struct _jl_method_t { // forcing the conclusion to always true. _jl_purity_overrides_t purity; - uint32_t num_dynamic_dispatches; - uint64_t dynamic_dispatch_ns; + int32_t num_dynamic_dispatches; + int64_t dynamic_dispatch_ns; // hidden fields: // lock for modifications to the method From be490c96407cda6f7ac7ff7c8c98c66cc4b9b011 Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Sun, 26 Nov 2023 18:58:47 -0500 Subject: [PATCH 26/26] Add defensive check --- src/gf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gf.c b/src/gf.c index 29edf63429bd9..9a5a720f3afbc 100644 --- a/src/gf.c +++ b/src/gf.c @@ -2954,11 +2954,11 @@ JL_DLLEXPORT jl_value_t *jl_apply_generic(jl_value_t *F, jl_value_t **args, uint #ifdef JL_DISPATCH_LOG_BOXES JL_DLLEXPORT int32_t jl_get_num_dynamic_dispatches(jl_method_t *m) { - return m->num_dynamic_dispatches; + return (m && jl_is_method(m)) ? m->num_dynamic_dispatches : 0; } JL_DLLEXPORT int64_t jl_get_dynamic_dispatch_ns(jl_method_t *m) { - return m->dynamic_dispatch_ns; + return (m && jl_is_method(m)) ? m->dynamic_dispatch_ns : 0; } #else JL_DLLEXPORT int32_t jl_get_num_dynamic_dispatches(jl_method_t *m)