Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 131 additions & 33 deletions src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,12 +295,12 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_
jl_value_t *ci = cgparams.lookup(mi, world, world);
JL_GC_PROMISE_ROOTED(ci);
jl_code_instance_t *codeinst = NULL;
JL_GC_PUSH1(&codeinst);
if (ci != jl_nothing && jl_atomic_load_relaxed(&((jl_code_instance_t *)ci)->inferred) != jl_nothing) {
codeinst = (jl_code_instance_t*)ci;
}
else {
if (cgparams.lookup != jl_rettype_inferred_addr) {
// XXX: This will corrupt and leak a lot of memory which may be very bad
jl_error("Refusing to automatically run type inference with custom cache lookup.");
}
else {
Expand All @@ -309,15 +309,129 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_
* it into the cache here, since it was explicitly requested and is
* otherwise not reachable from anywhere in the system image.
*/
if (!jl_mi_cache_has_ci(mi, codeinst))
if (codeinst && !jl_mi_cache_has_ci(mi, codeinst)) {
JL_GC_PUSH1(&codeinst);
jl_mi_cache_insert(mi, codeinst);
JL_GC_POP();
}
}
}
JL_GC_POP();
return codeinst;
}

arraylist_t new_invokes;
typedef DenseMap<jl_code_instance_t*, std::pair<orc::ThreadSafeModule, jl_llvm_functions_t>> jl_compiled_functions_t;
static void compile_workqueue(jl_codegen_params_t &params, CompilationPolicy policy, jl_compiled_functions_t &compiled_functions)
{
decltype(params.workqueue) workqueue;
std::swap(params.workqueue, workqueue);
jl_code_info_t *src = NULL;
jl_code_instance_t *codeinst = NULL;
JL_GC_PUSH2(&src, &codeinst);
assert(!params.cache);
while (!workqueue.empty()) {
auto it = workqueue.pop_back_val();
codeinst = it.first;
auto &proto = it.second;
// try to emit code for this item from the workqueue
StringRef invokeName = "";
StringRef preal_decl = "";
bool preal_specsig = false;
{
auto it = compiled_functions.find(codeinst);
if (it == compiled_functions.end()) {
// Reinfer the function. The JIT came along and removed the inferred
// method body. See #34993
if ((policy != CompilationPolicy::Default || params.params->trim) &&
jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) {
// XXX: SOURCE_MODE_FORCE_SOURCE is wrong here (neither sufficient nor necessary)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any hint w.r.t. to how this will be tightened up, or what the consequences are?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know yet, but I think it will involve moving more of this function into inference

codeinst = jl_type_infer(codeinst->def, jl_atomic_load_relaxed(&codeinst->max_world), SOURCE_MODE_FORCE_SOURCE);
}
if (codeinst) {
orc::ThreadSafeModule result_m =
jl_create_ts_module(name_from_method_instance(codeinst->def),
params.tsctx, params.DL, params.TargetTriple);
auto decls = jl_emit_codeinst(result_m, codeinst, NULL, params);
if (result_m)
it = compiled_functions.insert(std::make_pair(codeinst, std::make_pair(std::move(result_m), std::move(decls)))).first;
}
}
if (it != compiled_functions.end()) {
auto &decls = it->second.second;
invokeName = decls.functionObject;
if (decls.functionObject == "jl_fptr_args") {
preal_decl = decls.specFunctionObject;
}
else if (decls.functionObject != "jl_fptr_sparam" && decls.functionObject != "jl_f_opaque_closure_call") {
preal_decl = decls.specFunctionObject;
preal_specsig = true;
}
}
}
// patch up the prototype we emitted earlier
Module *mod = proto.decl->getParent();
assert(proto.decl->isDeclaration());
Function *pinvoke = nullptr;
if (preal_decl.empty()) {
if (invokeName.empty() && params.params->trim) {
errs() << "Bailed out to invoke when compiling:";
jl_(codeinst->def);
abort();
}
pinvoke = emit_tojlinvoke(codeinst, invokeName, mod, params);
if (!proto.specsig)
proto.decl->replaceAllUsesWith(pinvoke);
}
if (proto.specsig && !preal_specsig) {
// get or build an fptr1 that can invoke codeinst
if (pinvoke == nullptr)
pinvoke = get_or_emit_fptr1(preal_decl, mod);
// emit specsig-to-(jl)invoke conversion
proto.decl->setLinkage(GlobalVariable::InternalLinkage);
//protodecl->setAlwaysInline();
jl_init_function(proto.decl, params.TargetTriple);
jl_method_instance_t *mi = codeinst->def;
size_t nrealargs = jl_nparams(mi->specTypes); // number of actual arguments being passed
bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
// TODO: maybe this can be cached in codeinst->specfptr?
emit_specsig_to_fptr1(proto.decl, proto.cc, proto.return_roots, mi->specTypes, codeinst->rettype, is_opaque_closure, nrealargs, params, pinvoke, 0, 0);
preal_decl = ""; // no need to fixup the name
}
if (!preal_decl.empty()) {
// merge and/or rename this prototype to the real function
if (Value *specfun = mod->getNamedValue(preal_decl)) {
if (proto.decl != specfun)
proto.decl->replaceAllUsesWith(specfun);
}
else {
proto.decl->setName(preal_decl);
}
}
if (proto.oc) { // additionally, if we are dealing with an oc, then we might also need to fix up the fptr1 reference too
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just for my personal edification, and to get a sense of some of the affected interface here, could you explain what the invalid part looked like here:

The invalid way of doing this became much harder to express, which
exposes a lot of bugs (hits more assertion errors and causes more crashes).

in terms of code?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ABI is expressed as the tuple (mi.specTypes => codeinst.rettype), but the old code tried to use the wrong value for rettype by reimplementing a buggier version of the workqueue here. With the workqueue gone, that was no longer feasible.

assert(proto.specsig);
StringRef ocinvokeDecl = invokeName;
// if OC expected a specialized specsig dispatch, but we don't have it, use the inner trampoline here too
// XXX: this invoke translation logic is supposed to exactly match new_opaque_closure
if (!preal_specsig || ocinvokeDecl == "jl_f_opaque_closure_call" || ocinvokeDecl == "jl_fptr_interpret_call" || ocinvokeDecl == "jl_fptr_const_return")
ocinvokeDecl = pinvoke->getName();
assert(!ocinvokeDecl.empty());
assert(ocinvokeDecl != "jl_fptr_args");
assert(ocinvokeDecl != "jl_fptr_sparam");
// merge and/or rename this prototype to the real function
if (Value *specfun = mod->getNamedValue(ocinvokeDecl)) {
if (proto.oc != specfun)
proto.oc->replaceAllUsesWith(specfun);
}
else {
proto.oc->setName(ocinvokeDecl);
}
}
workqueue.append(params.workqueue);
params.workqueue.clear();
}
JL_GC_POP();
}


// takes the running content that has collected in the shadow module and dump it to disk
// this builds the object file portion of the sysimage files for fast startup, and can
// also be used be extern consumers like GPUCompiler.jl to obtain a module containing
Expand Down Expand Up @@ -346,7 +460,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
orc::ThreadSafeContext ctx;
orc::ThreadSafeModule backing;
if (!llvmmod) {
ctx = jl_ExecutionEngine->acquireContext();
ctx = jl_ExecutionEngine->makeContext();
backing = jl_create_ts_module("text", ctx);
}
orc::ThreadSafeModule &clone = llvmmod ? *unwrap(llvmmod) : backing;
Expand All @@ -367,11 +481,11 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
params.imaging_mode = imaging;
params.debug_level = cgparams->debug_info_level;
params.external_linkage = _external_linkage;
arraylist_new(&new_invokes, 0);
size_t compile_for[] = { jl_typeinf_world, _world };
int worlds = 0;
if (jl_options.trim != JL_TRIM_NO)
worlds = 1;
jl_compiled_functions_t compiled_functions;
for (; worlds < 2; worlds++) {
JL_TIMING(NATIVE_AOT, NATIVE_Codegen);
size_t this_world = compile_for[worlds];
Expand All @@ -391,7 +505,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
continue;
}
mi = (jl_method_instance_t*)item;
compile_mi:
src = NULL;
// if this method is generally visible to the current compilation world,
// and this is either the primary world, or not applicable in the primary world
Expand All @@ -406,7 +519,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
jl_(mi);
abort();
}
if (codeinst && !params.compiled_functions.count(codeinst) && !data->jl_fvar_map.count(codeinst)) {
if (codeinst && !compiled_functions.count(codeinst) && !data->jl_fvar_map.count(codeinst)) {
// now add it to our compilation results
// Const returns do not do codegen, but juliac inspects codegen results so make a dummy fvar entry to represent it
if (jl_options.trim != JL_TRIM_NO && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) {
Expand All @@ -418,7 +531,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
Triple(clone.getModuleUnlocked()->getTargetTriple()));
jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, NULL, params);
if (result_m)
params.compiled_functions[codeinst] = {std::move(result_m), std::move(decls)};
compiled_functions[codeinst] = {std::move(result_m), std::move(decls)};
else if (jl_options.trim != JL_TRIM_NO) {
// if we're building a small image, we need to compile everything
// to ensure that we have all the information we need.
Expand All @@ -428,26 +541,19 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
}
}
}
} else if (this_world != jl_typeinf_world) {
}
else if (this_world != jl_typeinf_world) {
/*
jl_safe_printf("Codegen could not find requested codeinstance to be compiled\n");
jl_(mi);
abort();
*/
}
// TODO: is goto the best way to do this?
jl_compile_workqueue(params, policy);
mi = (jl_method_instance_t*)arraylist_pop(&new_invokes);
if (mi != NULL) {
goto compile_mi;
}
}

// finally, make sure all referenced methods also get compiled or fixed up
jl_compile_workqueue(params, policy);
}
JL_GC_POP();
arraylist_free(&new_invokes);
// finally, make sure all referenced methods also get compiled or fixed up
compile_workqueue(params, policy, compiled_functions);

// process the globals array, before jl_merge_module destroys them
SmallVector<std::string, 0> gvars(params.global_targets.size());
Expand All @@ -464,7 +570,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
data->jl_value_to_llvm[idx] = global.first;
idx++;
}
CreateNativeMethods += params.compiled_functions.size();
CreateNativeMethods += compiled_functions.size();

size_t offset = gvars.size();
data->jl_external_to_llvm.resize(params.external_fns.size());
Expand All @@ -489,7 +595,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
{
JL_TIMING(NATIVE_AOT, NATIVE_Merge);
Linker L(*clone.getModuleUnlocked());
for (auto &def : params.compiled_functions) {
for (auto &def : compiled_functions) {
jl_merge_module(clone, std::move(std::get<0>(def.second)));
jl_code_instance_t *this_code = def.first;
jl_llvm_functions_t decls = std::get<1>(def.second);
Expand Down Expand Up @@ -573,9 +679,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
}
ct->reentrant_timing &= ~1ull;
}
if (ctx.getContext()) {
jl_ExecutionEngine->releaseContext(std::move(ctx));
}
return (void*)data;
}

Expand Down Expand Up @@ -1975,11 +2078,6 @@ void jl_dump_native_impl(void *native_code,
}
}

void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis)
{
PM->add(new TargetLibraryInfoWrapperPass(triple));
PM->add(createTargetTransformInfoWrapperPass(std::move(analysis)));
}

// sometimes in GDB you want to find out what code would be created from a mi
extern "C" JL_DLLEXPORT_CODEGEN jl_code_info_t *jl_gdbdumpcode(jl_method_instance_t *mi)
Expand Down Expand Up @@ -2037,16 +2135,16 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, jl_
dump->F = nullptr;
dump->TSM = nullptr;
if (src && jl_is_code_info(src)) {
auto ctx = jl_ExecutionEngine->getContext();
orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), *ctx);
auto ctx = jl_ExecutionEngine->makeContext();
orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), ctx);
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();
auto target_info = m.withModuleDo([&](Module &M) {
return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
});
jl_codegen_params_t output(*ctx, std::move(target_info.first), std::move(target_info.second));
jl_codegen_params_t output(ctx, std::move(target_info.first), std::move(target_info.second));
output.params = &params;
output.imaging_mode = imaging_default();
// This would be nice, but currently it causes some assembly regressions that make printed output
Expand Down
Loading