diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 8ce84acb30901..c73b83a38d0ff 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -43,6 +43,22 @@ STATISTIC(EmittedWriteBarriers, "Number of write barriers emitted"); STATISTIC(EmittedNewStructs, "Number of new structs emitted"); STATISTIC(EmittedDeferSignal, "Number of deferred signals emitted"); +#ifdef JL_DISPATCH_LOG_BOXES +static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p); +void logbox(jl_codectx_t &ctx, const jl_cgval_t &vinfo, jl_count_box_type log_reason) +{ + if (log_reason != JL_DONT_LOG_BOX) { + Function *F; + if (log_reason == JL_COUNT_BOX_INPUTS) { + F = prepare_call(jllogboxinput_func); + } else { + F = prepare_call(jllogboxreturn_func); + } + ctx.builder.CreateCall(F, emit_sizeof(ctx, vinfo)); + } +} +#endif + static Value *track_pjlvalue(jl_codectx_t &ctx, Value *V) { assert(V->getType() == ctx.types().T_pjlvalue); @@ -999,8 +1015,11 @@ static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDN return load; } +#ifdef JL_DISPATCH_LOG_BOXES +static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v, bool is_promotable=false, jl_count_box_type log_reason=JL_DONT_LOG_BOX); +#else static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v, bool is_promotable=false); - +#endif static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull); static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull) @@ -1107,7 +1126,6 @@ static Value *emit_datatype_size(jl_codectx_t &ctx, Value *dt) return ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t)))); } -/* this is valid code, it's simply unused static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p) { if (p.TIndex) { @@ -1130,7 +1148,7 @@ static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p) ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)); ctx.builder.CreateCondBr(isboxed, dynloadBB, postBB); ctx.builder.SetInsertPoint(dynloadBB); - Value *datatype = emit_typeof(p.V); + Value *datatype = emit_typeof(ctx, p.V, false); Value *dyn_size = emit_datatype_size(ctx, datatype); ctx.builder.CreateBr(postBB); dynloadBB = ctx.builder.GetInsertBlock(); // could have changed @@ -1156,6 +1174,7 @@ static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p) } } +/* this is valid code, it's simply unused static Value *emit_datatype_mutabl(jl_codectx_t &ctx, Value *dt) { jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); @@ -3029,9 +3048,18 @@ static Value *load_i8box(jl_codectx_t &ctx, Value *v, jl_datatype_t *ty) (jl_value_t*)ty)); } +#ifdef JL_DISPATCH_LOG_BOXES +static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t, jl_count_box_type log_reason=JL_DONT_LOG_BOX); +#else +static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t); +#endif // some types have special boxing functions with small-value caches // Returns ctx.types().T_prjlvalue +#ifdef JL_DISPATCH_LOG_BOXES +static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t, jl_count_box_type log_reason) +#else static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t) +#endif { jl_value_t *jt = vinfo.typ; if (jt == (jl_value_t*)jl_bool_type) @@ -3087,6 +3115,11 @@ static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t assert(jb->instance != NULL); return track_pjlvalue(ctx, literal_pointer_val(ctx, jb->instance)); } +#ifdef JL_DISPATCH_LOG_BOXES + if (log_reason != JL_DONT_LOG_BOX && box && jb != jl_int8_type) { + logbox(ctx, vinfo, log_reason); + } +#endif return box; } @@ -3160,6 +3193,11 @@ static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut, return NULL; } +#ifdef JL_DISPATCH_LOG_BOXES +static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallBitVector &skip, jl_count_box_type log_reason=JL_DONT_LOG_BOX); +#else +static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallBitVector &skip); +#endif /* * Box unboxed values in a union. Optionally, skip certain unboxed values, * returning `Constant::getNullValue(ctx.types().T_pjlvalue)` in one of the skipped cases. If `skip` is not empty, @@ -3168,7 +3206,11 @@ static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut, * `vinfo` is already an unknown boxed union (union tag 0x80). */ // Returns ctx.types().T_prjlvalue +#ifdef JL_DISPATCH_LOG_BOXES +static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallBitVector &skip, jl_count_box_type log_reason) +#else static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallBitVector &skip) +#endif { // given vinfo::Union{T, S}, emit IR of the form: // ... @@ -3206,10 +3248,19 @@ static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallB } else { jl_cgval_t vinfo_r = jl_cgval_t(vinfo, (jl_value_t*)jt, NULL); +#ifdef JL_DISPATCH_LOG_BOXES + box = _boxed_special(ctx, vinfo_r, t, log_reason); +#else box = _boxed_special(ctx, vinfo_r, t); +#endif if (!box) { box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt)); init_bits_cgval(ctx, box, vinfo_r, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut); +#ifdef JL_DISPATCH_LOG_BOXES + if (log_reason != JL_DONT_LOG_BOX) { + logbox(ctx, vinfo_r, log_reason); + } +#endif } } tempBB = ctx.builder.GetInsertBlock(); // could have changed @@ -3303,7 +3354,11 @@ static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsig // dynamically-typed value is required (e.g. argument to unknown function). // if it's already a pointer it's left alone. // Returns ctx.types().T_prjlvalue +#ifdef JL_DISPATCH_LOG_BOXES +static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotable, jl_count_box_type log_reason) +#else static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotable) +#endif { jl_value_t *jt = vinfo.typ; if (jt == jl_bottom_type || jt == NULL) @@ -3323,14 +3378,22 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab Value *box; if (vinfo.TIndex) { SmallBitVector skip_none; +#ifdef JL_DISPATCH_LOG_BOXES + box = box_union(ctx, vinfo, skip_none, log_reason); +#else box = box_union(ctx, vinfo, skip_none); +#endif } else { assert(vinfo.V && "Missing data for unboxed value."); assert(jl_is_concrete_immutable(jt) && "This type shouldn't have been unboxed."); Type *t = julia_type_to_llvm(ctx, jt); assert(!type_is_ghost(t)); // ghost values should have been handled by vinfo.constant above! +#ifdef JL_DISPATCH_LOG_BOXES + box = _boxed_special(ctx, vinfo, t, log_reason); +#else box = _boxed_special(ctx, vinfo, t); +#endif if (!box) { bool do_promote = vinfo.promotion_point; if (do_promote && is_promotable) { @@ -3353,6 +3416,11 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt)); init_bits_cgval(ctx, box, vinfo, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut); } +#ifdef JL_DISPATCH_LOG_BOXES + if (log_reason != JL_DONT_LOG_BOX) { + logbox(ctx, vinfo, log_reason); + } +#endif } } return box; diff --git a/src/codegen.cpp b/src/codegen.cpp index a4773acb3fbea..53bcded3583c5 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -889,6 +889,22 @@ static const auto jlegalx_func = new JuliaFunction{ AttributeSet(), None); }, }; +#ifdef JL_DISPATCH_LOG_BOXES +static const auto jllogboxinput_func = new JuliaFunction{ + XSTR(jl_log_box_input), + [](LLVMContext &C) { + return FunctionType::get(getVoidTy(C), {getInt32Ty(C)}, false); + }, + nullptr, +}; +static const auto jllogboxreturn_func = new JuliaFunction{ + XSTR(jl_log_box_return), + [](LLVMContext &C) { + return FunctionType::get(getVoidTy(C), {getInt32Ty(C)}, false); + }, + nullptr, +}; +#endif static const auto jl_alloc_obj_func = new JuliaFunction{ "julia.gc_alloc_obj", [](LLVMContext &C) { @@ -4072,7 +4088,12 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *t if (theF) theArgs.push_back(theF); for (size_t i = 0; i < nargs; i++) { +#ifdef JL_DISPATCH_LOG_BOXES + // log the boxed arguments for this call + Value *arg = boxed(ctx, argv[i], false, JL_COUNT_BOX_INPUTS); +#else Value *arg = boxed(ctx, argv[i]); +#endif theArgs.push_back(arg); } CallInst *result = ctx.builder.CreateCall(TheTrampoline, theArgs); @@ -6723,7 +6744,12 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret break; } } +#ifdef JL_DISPATCH_LOG_BOXES + // log the box for this return value + ctx.builder.CreateRet(boxed(ctx, retval, false, JL_COUNT_BOX_RETURNS)); +#else ctx.builder.CreateRet(boxed(ctx, retval)); +#endif return w; } @@ -8224,7 +8250,12 @@ static jl_llvm_functions_t else if (VN->getType() == ctx.types().T_prjlvalue) { // Includes the jl_is_uniontype(phiType) && !TindexN case // TODO: if convert_julia_type says it is wasted effort and to skip it, is it worth using Constant::getNullValue(ctx.types().T_prjlvalue) (dynamically)? +#ifdef JL_DISPATCH_LOG_BOXES + // NOTE(PR): This one is the boxing for hash. + V = boxed(ctx, val, false, JL_COUNT_BOX_INPUTS); +#else V = boxed(ctx, val); +#endif } else { // must be careful to emit undef here (rather than a bitcast or diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp index 1bcbeb2189f5f..f0c0dde410066 100644 --- a/src/gc-alloc-profiler.cpp +++ b/src/gc-alloc-profiler.cpp @@ -78,6 +78,90 @@ jl_raw_backtrace_t get_raw_backtrace() JL_NOTSAFEPOINT { extern "C" { // Needed since these functions doesn't take any arguments. +uint64_t num_boxed_inputs; +uint64_t boxed_inputs_size; +uint64_t extra_num_boxed_inputs; +uint64_t extra_boxed_inputs_size; +uint64_t num_boxed_returns; +uint64_t boxed_returns_size; + +JL_DLLEXPORT uint64_t jl_total_boxes() +{ + return num_boxed_inputs + extra_num_boxed_inputs + num_boxed_returns; +} +JL_DLLEXPORT uint64_t jl_total_boxes_size() +{ + return boxed_inputs_size + extra_boxed_inputs_size + boxed_returns_size; +} +JL_DLLEXPORT uint64_t jl_num_boxed_inputs() +{ + return num_boxed_inputs; +} +JL_DLLEXPORT uint64_t jl_boxed_inputs_size() +{ + return boxed_inputs_size; +} +JL_DLLEXPORT uint64_t jl_extra_num_boxed_inputs() +{ + return extra_num_boxed_inputs; +} +JL_DLLEXPORT uint64_t jl_extra_boxed_inputs_size() +{ + return extra_boxed_inputs_size; +} +JL_DLLEXPORT uint64_t jl_num_boxed_returns() +{ + return num_boxed_returns; +} +JL_DLLEXPORT uint64_t jl_boxed_returns_size() +{ + return boxed_returns_size; +} + +static float extra_allocs_rate = 0.0f; +JL_DLLEXPORT void jl_set_extra_allocs_rate(float rate) +{ + extra_allocs_rate = rate; +} + +#ifdef JL_DISPATCH_LOG_BOXES +JL_DLLEXPORT void jl_log_box_input(int32_t sz) +{ + num_boxed_inputs++; + boxed_inputs_size += sz; + + // Randomly, with a probability of `extra_allocs_rate`, record some number of + // extra allocations. The goal is to estimate the impact of _reducing_ the + // number of allocations for boxing. For a rate >1, more than one allocation + // may be recorded: we pick a random number between 0 and extra_allocs_rate, + // then round it and allocate that many extra objects. + if (extra_allocs_rate > 0.0f) { + float num_extra_allocs = extra_allocs_rate; + jl_value_t *extra_obj; + while (num_extra_allocs > 1) { + num_extra_allocs--; + extra_num_boxed_inputs++; + extra_boxed_inputs_size += sz; + extra_obj = jl_gc_allocobj(sz); + memset(extra_obj, 0, sz); + } + // decide whether or not to allocate for the last one + float sample = float(rand()) / float(RAND_MAX); + if (sample < num_extra_allocs) { + extra_num_boxed_inputs++; + extra_boxed_inputs_size += sz; + extra_obj = jl_gc_allocobj(sz); + memset(extra_obj, 0, sz); + } + } +} +JL_DLLEXPORT void jl_log_box_return(int32_t sz) +{ + num_boxed_returns++; + boxed_returns_size += sz; +} +#endif + JL_DLLEXPORT void jl_start_alloc_profile(double sample_rate) { // We only need to do this once, the first time this is called. size_t nthreads = jl_atomic_load_acquire(&jl_n_threads); diff --git a/src/gc-alloc-profiler.h b/src/gc-alloc-profiler.h index fcd8e45caa2d8..e6c147913fa1d 100644 --- a/src/gc-alloc-profiler.h +++ b/src/gc-alloc-profiler.h @@ -17,6 +17,14 @@ extern "C" { // Forward-declaration to avoid dependency in header file. struct jl_raw_alloc_t; // Defined in gc-alloc-profiler.cpp +#ifdef JL_DISPATCH_LOG_BOXES +enum jl_count_box_type { + JL_DONT_LOG_BOX, + JL_COUNT_BOX_INPUTS, + JL_COUNT_BOX_RETURNS, +}; +#endif + typedef struct { struct jl_raw_alloc_t *allocs; size_t num_allocs; diff --git a/src/gf.c b/src/gf.c index 33849dd5aa387..9a5a720f3afbc 100644 --- a/src/gf.c +++ b/src/gf.c @@ -2933,13 +2933,46 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t JL_DLLEXPORT jl_value_t *jl_apply_generic(jl_value_t *F, jl_value_t **args, uint32_t nargs) { size_t world = jl_current_task->world_age; +#ifdef JL_DISPATCH_LOG_BOXES + uint64_t t0 = jl_hrtime(); +#endif jl_method_instance_t *mfunc = jl_lookup_generic_(F, args, nargs, jl_int32hash_fast(jl_return_address()), world); JL_GC_PROMISE_ROOTED(mfunc); + +#ifdef JL_DISPATCH_LOG_BOXES + jl_method_t *def = mfunc->def.method; + if (jl_is_method(def)) { + def->num_dynamic_dispatches++; + def->dynamic_dispatch_ns += (jl_hrtime() - t0); + } +#endif return _jl_invoke(F, args, nargs, mfunc, world); } +#ifdef JL_DISPATCH_LOG_BOXES +JL_DLLEXPORT int32_t jl_get_num_dynamic_dispatches(jl_method_t *m) +{ + return (m && jl_is_method(m)) ? m->num_dynamic_dispatches : 0; +} +JL_DLLEXPORT int64_t jl_get_dynamic_dispatch_ns(jl_method_t *m) +{ + return (m && jl_is_method(m)) ? m->dynamic_dispatch_ns : 0; +} +#else +JL_DLLEXPORT int32_t jl_get_num_dynamic_dispatches(jl_method_t *m) +{ + jl_error("not logging"); + return 0; +} +JL_DLLEXPORT int64_t jl_get_dynamic_dispatch_ns(jl_method_t *m) +{ + jl_error("not logging"); + return 0; +} +#endif + static jl_method_match_t *_gf_invoke_lookup(jl_value_t *types JL_PROPAGATES_ROOT, jl_value_t *mt, size_t world, size_t *min_valid, size_t *max_valid) { jl_value_t *unw = jl_unwrap_unionall((jl_value_t*)types); diff --git a/src/jltypes.c b/src/jltypes.c index 0767e9493bbc0..fc651befca365 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -2600,7 +2600,7 @@ void jl_init_types(void) JL_GC_DISABLED jl_method_type = jl_new_datatype(jl_symbol("Method"), core, jl_any_type, jl_emptysvec, - jl_perm_symsvec(29, + jl_perm_symsvec(31, "name", "module", "file", @@ -2629,8 +2629,10 @@ void jl_init_types(void) JL_GC_DISABLED "pure", "is_for_opaque_closure", "constprop", - "purity"), - jl_svec(29, + "purity", + "num_dynamic_dispatches", + "dynamic_dispatch_ns"), + jl_svec(31, jl_symbol_type, jl_module_type, jl_symbol_type, @@ -2659,7 +2661,9 @@ void jl_init_types(void) JL_GC_DISABLED jl_bool_type, jl_bool_type, jl_uint8_type, - jl_uint8_type), + jl_uint8_type, + jl_int32_type, + jl_int64_type), jl_emptysvec, 0, 1, 10); //const static uint32_t method_constfields[1] = { 0x03fc065f }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<4)|(1<<6)|(1<<9)|(1<<10)|(1<<18)|(1<<19)|(1<<20)|(1<<21)|(1<<22)|(1<<23)|(1<<24)|(1<<25); diff --git a/src/julia.h b/src/julia.h index a34c1f06d0cc1..2da6c78941616 100644 --- a/src/julia.h +++ b/src/julia.h @@ -348,6 +348,9 @@ typedef struct _jl_method_t { // forcing the conclusion to always true. _jl_purity_overrides_t purity; + int32_t num_dynamic_dispatches; + int64_t dynamic_dispatch_ns; + // hidden fields: // lock for modifications to the method jl_mutex_t writelock; diff --git a/src/method.c b/src/method.c index 4e48ef9122d79..e70b0a159f179 100644 --- a/src/method.c +++ b/src/method.c @@ -805,6 +805,10 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module) m->deleted_world = ~(size_t)0; m->is_for_opaque_closure = 0; m->constprop = 0; +#ifdef JL_DISPATCH_LOG_BOXES + m->num_dynamic_dispatches = 0; + m->dynamic_dispatch_ns = 0; +#endif JL_MUTEX_INIT(&m->writelock); return m; } diff --git a/src/options.h b/src/options.h index 06af3e33fcbdc..ec75e11b15307 100644 --- a/src/options.h +++ b/src/options.h @@ -98,6 +98,9 @@ // profile generic (not inlined or specialized) calls to each function //#define JL_GF_PROFILE +// count boxes and box sizes for generic calls +#define JL_DISPATCH_LOG_BOXES + // task options ---------------------------------------------------------------