-
-
Couldn't load subscription status.
- Fork 5.7k
Split up the one big codegen lock into per-function locks and dependency edge tracking #56179
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ddd8439
f0ab810
8612b9d
d4956ef
557f16f
9885eaf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -295,12 +295,12 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_ | |
| jl_value_t *ci = cgparams.lookup(mi, world, world); | ||
| JL_GC_PROMISE_ROOTED(ci); | ||
| jl_code_instance_t *codeinst = NULL; | ||
| JL_GC_PUSH1(&codeinst); | ||
| if (ci != jl_nothing && jl_atomic_load_relaxed(&((jl_code_instance_t *)ci)->inferred) != jl_nothing) { | ||
| codeinst = (jl_code_instance_t*)ci; | ||
| } | ||
| else { | ||
| if (cgparams.lookup != jl_rettype_inferred_addr) { | ||
| // XXX: This will corrupt and leak a lot of memory which may be very bad | ||
| jl_error("Refusing to automatically run type inference with custom cache lookup."); | ||
| } | ||
| else { | ||
|
|
@@ -309,15 +309,129 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_ | |
| * it into the cache here, since it was explicitly requested and is | ||
| * otherwise not reachable from anywhere in the system image. | ||
| */ | ||
| if (!jl_mi_cache_has_ci(mi, codeinst)) | ||
| if (codeinst && !jl_mi_cache_has_ci(mi, codeinst)) { | ||
| JL_GC_PUSH1(&codeinst); | ||
| jl_mi_cache_insert(mi, codeinst); | ||
| JL_GC_POP(); | ||
| } | ||
| } | ||
| } | ||
| JL_GC_POP(); | ||
| return codeinst; | ||
| } | ||
|
|
||
| arraylist_t new_invokes; | ||
| typedef DenseMap<jl_code_instance_t*, std::pair<orc::ThreadSafeModule, jl_llvm_functions_t>> jl_compiled_functions_t; | ||
| static void compile_workqueue(jl_codegen_params_t ¶ms, CompilationPolicy policy, jl_compiled_functions_t &compiled_functions) | ||
| { | ||
| decltype(params.workqueue) workqueue; | ||
| std::swap(params.workqueue, workqueue); | ||
| jl_code_info_t *src = NULL; | ||
| jl_code_instance_t *codeinst = NULL; | ||
| JL_GC_PUSH2(&src, &codeinst); | ||
| assert(!params.cache); | ||
| while (!workqueue.empty()) { | ||
| auto it = workqueue.pop_back_val(); | ||
| codeinst = it.first; | ||
| auto &proto = it.second; | ||
| // try to emit code for this item from the workqueue | ||
| StringRef invokeName = ""; | ||
| StringRef preal_decl = ""; | ||
| bool preal_specsig = false; | ||
| { | ||
| auto it = compiled_functions.find(codeinst); | ||
| if (it == compiled_functions.end()) { | ||
| // Reinfer the function. The JIT came along and removed the inferred | ||
| // method body. See #34993 | ||
| if ((policy != CompilationPolicy::Default || params.params->trim) && | ||
| jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) { | ||
| // XXX: SOURCE_MODE_FORCE_SOURCE is wrong here (neither sufficient nor necessary) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any hint w.r.t. to how this will be tightened up, or what the consequences are? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know yet, but I think it will involve moving more of this function into inference |
||
| codeinst = jl_type_infer(codeinst->def, jl_atomic_load_relaxed(&codeinst->max_world), SOURCE_MODE_FORCE_SOURCE); | ||
| } | ||
| if (codeinst) { | ||
| orc::ThreadSafeModule result_m = | ||
| jl_create_ts_module(name_from_method_instance(codeinst->def), | ||
| params.tsctx, params.DL, params.TargetTriple); | ||
| auto decls = jl_emit_codeinst(result_m, codeinst, NULL, params); | ||
| if (result_m) | ||
| it = compiled_functions.insert(std::make_pair(codeinst, std::make_pair(std::move(result_m), std::move(decls)))).first; | ||
| } | ||
| } | ||
| if (it != compiled_functions.end()) { | ||
| auto &decls = it->second.second; | ||
| invokeName = decls.functionObject; | ||
| if (decls.functionObject == "jl_fptr_args") { | ||
| preal_decl = decls.specFunctionObject; | ||
| } | ||
| else if (decls.functionObject != "jl_fptr_sparam" && decls.functionObject != "jl_f_opaque_closure_call") { | ||
| preal_decl = decls.specFunctionObject; | ||
| preal_specsig = true; | ||
| } | ||
| } | ||
| } | ||
| // patch up the prototype we emitted earlier | ||
| Module *mod = proto.decl->getParent(); | ||
| assert(proto.decl->isDeclaration()); | ||
| Function *pinvoke = nullptr; | ||
| if (preal_decl.empty()) { | ||
| if (invokeName.empty() && params.params->trim) { | ||
| errs() << "Bailed out to invoke when compiling:"; | ||
| jl_(codeinst->def); | ||
| abort(); | ||
| } | ||
| pinvoke = emit_tojlinvoke(codeinst, invokeName, mod, params); | ||
| if (!proto.specsig) | ||
| proto.decl->replaceAllUsesWith(pinvoke); | ||
| } | ||
| if (proto.specsig && !preal_specsig) { | ||
| // get or build an fptr1 that can invoke codeinst | ||
| if (pinvoke == nullptr) | ||
| pinvoke = get_or_emit_fptr1(preal_decl, mod); | ||
| // emit specsig-to-(jl)invoke conversion | ||
| proto.decl->setLinkage(GlobalVariable::InternalLinkage); | ||
| //protodecl->setAlwaysInline(); | ||
| jl_init_function(proto.decl, params.TargetTriple); | ||
| jl_method_instance_t *mi = codeinst->def; | ||
| size_t nrealargs = jl_nparams(mi->specTypes); // number of actual arguments being passed | ||
| bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure; | ||
| // TODO: maybe this can be cached in codeinst->specfptr? | ||
| emit_specsig_to_fptr1(proto.decl, proto.cc, proto.return_roots, mi->specTypes, codeinst->rettype, is_opaque_closure, nrealargs, params, pinvoke, 0, 0); | ||
| preal_decl = ""; // no need to fixup the name | ||
| } | ||
| if (!preal_decl.empty()) { | ||
| // merge and/or rename this prototype to the real function | ||
| if (Value *specfun = mod->getNamedValue(preal_decl)) { | ||
| if (proto.decl != specfun) | ||
| proto.decl->replaceAllUsesWith(specfun); | ||
| } | ||
| else { | ||
| proto.decl->setName(preal_decl); | ||
| } | ||
| } | ||
| if (proto.oc) { // additionally, if we are dealing with an oc, then we might also need to fix up the fptr1 reference too | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just for my personal edification, and to get a sense of some of the affected interface here, could you explain what the invalid part looked like here:
in terms of code? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The ABI is expressed as the tuple (mi.specTypes => codeinst.rettype), but the old code tried to use the wrong value for rettype by reimplementing a buggier version of the workqueue here. With the workqueue gone, that was no longer feasible. |
||
| assert(proto.specsig); | ||
| StringRef ocinvokeDecl = invokeName; | ||
| // if OC expected a specialized specsig dispatch, but we don't have it, use the inner trampoline here too | ||
| // XXX: this invoke translation logic is supposed to exactly match new_opaque_closure | ||
| if (!preal_specsig || ocinvokeDecl == "jl_f_opaque_closure_call" || ocinvokeDecl == "jl_fptr_interpret_call" || ocinvokeDecl == "jl_fptr_const_return") | ||
| ocinvokeDecl = pinvoke->getName(); | ||
| assert(!ocinvokeDecl.empty()); | ||
| assert(ocinvokeDecl != "jl_fptr_args"); | ||
| assert(ocinvokeDecl != "jl_fptr_sparam"); | ||
| // merge and/or rename this prototype to the real function | ||
| if (Value *specfun = mod->getNamedValue(ocinvokeDecl)) { | ||
| if (proto.oc != specfun) | ||
| proto.oc->replaceAllUsesWith(specfun); | ||
| } | ||
| else { | ||
| proto.oc->setName(ocinvokeDecl); | ||
| } | ||
| } | ||
| workqueue.append(params.workqueue); | ||
| params.workqueue.clear(); | ||
| } | ||
| JL_GC_POP(); | ||
| } | ||
|
|
||
|
|
||
| // takes the running content that has collected in the shadow module and dump it to disk | ||
| // this builds the object file portion of the sysimage files for fast startup, and can | ||
| // also be used be extern consumers like GPUCompiler.jl to obtain a module containing | ||
|
|
@@ -346,7 +460,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm | |
| orc::ThreadSafeContext ctx; | ||
| orc::ThreadSafeModule backing; | ||
| if (!llvmmod) { | ||
| ctx = jl_ExecutionEngine->acquireContext(); | ||
| ctx = jl_ExecutionEngine->makeContext(); | ||
| backing = jl_create_ts_module("text", ctx); | ||
| } | ||
| orc::ThreadSafeModule &clone = llvmmod ? *unwrap(llvmmod) : backing; | ||
|
|
@@ -367,11 +481,11 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm | |
| params.imaging_mode = imaging; | ||
| params.debug_level = cgparams->debug_info_level; | ||
| params.external_linkage = _external_linkage; | ||
| arraylist_new(&new_invokes, 0); | ||
| size_t compile_for[] = { jl_typeinf_world, _world }; | ||
| int worlds = 0; | ||
| if (jl_options.trim != JL_TRIM_NO) | ||
| worlds = 1; | ||
| jl_compiled_functions_t compiled_functions; | ||
| for (; worlds < 2; worlds++) { | ||
| JL_TIMING(NATIVE_AOT, NATIVE_Codegen); | ||
| size_t this_world = compile_for[worlds]; | ||
|
|
@@ -391,7 +505,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm | |
| continue; | ||
| } | ||
| mi = (jl_method_instance_t*)item; | ||
| compile_mi: | ||
| src = NULL; | ||
| // if this method is generally visible to the current compilation world, | ||
| // and this is either the primary world, or not applicable in the primary world | ||
|
|
@@ -406,7 +519,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm | |
| jl_(mi); | ||
| abort(); | ||
| } | ||
| if (codeinst && !params.compiled_functions.count(codeinst) && !data->jl_fvar_map.count(codeinst)) { | ||
| if (codeinst && !compiled_functions.count(codeinst) && !data->jl_fvar_map.count(codeinst)) { | ||
| // now add it to our compilation results | ||
| // Const returns do not do codegen, but juliac inspects codegen results so make a dummy fvar entry to represent it | ||
| if (jl_options.trim != JL_TRIM_NO && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) { | ||
|
|
@@ -418,7 +531,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm | |
| Triple(clone.getModuleUnlocked()->getTargetTriple())); | ||
| jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, NULL, params); | ||
| if (result_m) | ||
| params.compiled_functions[codeinst] = {std::move(result_m), std::move(decls)}; | ||
| compiled_functions[codeinst] = {std::move(result_m), std::move(decls)}; | ||
| else if (jl_options.trim != JL_TRIM_NO) { | ||
| // if we're building a small image, we need to compile everything | ||
| // to ensure that we have all the information we need. | ||
|
|
@@ -428,26 +541,19 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm | |
| } | ||
| } | ||
| } | ||
| } else if (this_world != jl_typeinf_world) { | ||
| } | ||
| else if (this_world != jl_typeinf_world) { | ||
| /* | ||
| jl_safe_printf("Codegen could not find requested codeinstance to be compiled\n"); | ||
| jl_(mi); | ||
| abort(); | ||
| */ | ||
| } | ||
| // TODO: is goto the best way to do this? | ||
| jl_compile_workqueue(params, policy); | ||
| mi = (jl_method_instance_t*)arraylist_pop(&new_invokes); | ||
| if (mi != NULL) { | ||
| goto compile_mi; | ||
| } | ||
| } | ||
|
|
||
| // finally, make sure all referenced methods also get compiled or fixed up | ||
| jl_compile_workqueue(params, policy); | ||
| } | ||
| JL_GC_POP(); | ||
| arraylist_free(&new_invokes); | ||
| // finally, make sure all referenced methods also get compiled or fixed up | ||
| compile_workqueue(params, policy, compiled_functions); | ||
|
|
||
| // process the globals array, before jl_merge_module destroys them | ||
| SmallVector<std::string, 0> gvars(params.global_targets.size()); | ||
|
|
@@ -464,7 +570,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm | |
| data->jl_value_to_llvm[idx] = global.first; | ||
| idx++; | ||
| } | ||
| CreateNativeMethods += params.compiled_functions.size(); | ||
| CreateNativeMethods += compiled_functions.size(); | ||
|
|
||
| size_t offset = gvars.size(); | ||
| data->jl_external_to_llvm.resize(params.external_fns.size()); | ||
|
|
@@ -489,7 +595,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm | |
| { | ||
| JL_TIMING(NATIVE_AOT, NATIVE_Merge); | ||
| Linker L(*clone.getModuleUnlocked()); | ||
| for (auto &def : params.compiled_functions) { | ||
| for (auto &def : compiled_functions) { | ||
| jl_merge_module(clone, std::move(std::get<0>(def.second))); | ||
| jl_code_instance_t *this_code = def.first; | ||
| jl_llvm_functions_t decls = std::get<1>(def.second); | ||
|
|
@@ -573,9 +679,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm | |
| } | ||
| ct->reentrant_timing &= ~1ull; | ||
| } | ||
| if (ctx.getContext()) { | ||
| jl_ExecutionEngine->releaseContext(std::move(ctx)); | ||
| } | ||
| return (void*)data; | ||
| } | ||
|
|
||
|
|
@@ -1975,11 +2078,6 @@ void jl_dump_native_impl(void *native_code, | |
| } | ||
| } | ||
|
|
||
| void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis) | ||
| { | ||
| PM->add(new TargetLibraryInfoWrapperPass(triple)); | ||
| PM->add(createTargetTransformInfoWrapperPass(std::move(analysis))); | ||
| } | ||
|
|
||
| // sometimes in GDB you want to find out what code would be created from a mi | ||
| extern "C" JL_DLLEXPORT_CODEGEN jl_code_info_t *jl_gdbdumpcode(jl_method_instance_t *mi) | ||
|
|
@@ -2037,16 +2135,16 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, jl_ | |
| dump->F = nullptr; | ||
| dump->TSM = nullptr; | ||
| if (src && jl_is_code_info(src)) { | ||
| auto ctx = jl_ExecutionEngine->getContext(); | ||
| orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), *ctx); | ||
| auto ctx = jl_ExecutionEngine->makeContext(); | ||
| orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), ctx); | ||
| uint64_t compiler_start_time = 0; | ||
| uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); | ||
| if (measure_compile_time_enabled) | ||
| compiler_start_time = jl_hrtime(); | ||
| auto target_info = m.withModuleDo([&](Module &M) { | ||
| return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple())); | ||
| }); | ||
| jl_codegen_params_t output(*ctx, std::move(target_info.first), std::move(target_info.second)); | ||
| jl_codegen_params_t output(ctx, std::move(target_info.first), std::move(target_info.second)); | ||
| output.params = ¶ms; | ||
| output.imaging_mode = imaging_default(); | ||
| // This would be nice, but currently it causes some assembly regressions that make printed output | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.