Skip to content

Commit ff9b970

Browse files
vtjnashKristofferC
authored andcommitted
Split up the one big codegen lock into per-function locks and dependency edge tracking (#56179)
Disjoint content can be LLVM optimized in parallel now, since codegen no longer has any ability to handle recursion, and compilation should even be able to run in parallel with the GC also. Removes any remaining global state, since that is unsafe. Adds a C++ shim for concurrent gc support in conjunction with using a `std::unique_lock` to DRY code. Fix RuntimeDyld implementation: Since we use the ForwardingMemoryManger instead of making a new RTDyldMemoryManager object every time, we need to reference count the finalizeMemory calls so that we only call that at the end of relocating everything when everything is ready. We already happen to conveniently have a shared_ptr here, so just use that instead of inventing a duplicate counter. Fixes many OC bugs, including mostly fixing #55035, since this bug is just that much harder to express in the more constrained API.
1 parent c1092c9 commit ff9b970

17 files changed

+1336
-1025
lines changed

src/aotcompile.cpp

Lines changed: 131 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -295,12 +295,12 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_
295295
jl_value_t *ci = cgparams.lookup(mi, world, world);
296296
JL_GC_PROMISE_ROOTED(ci);
297297
jl_code_instance_t *codeinst = NULL;
298-
JL_GC_PUSH1(&codeinst);
299298
if (ci != jl_nothing && jl_atomic_load_relaxed(&((jl_code_instance_t *)ci)->inferred) != jl_nothing) {
300299
codeinst = (jl_code_instance_t*)ci;
301300
}
302301
else {
303302
if (cgparams.lookup != jl_rettype_inferred_addr) {
303+
// XXX: This will corrupt and leak a lot of memory which may be very bad
304304
jl_error("Refusing to automatically run type inference with custom cache lookup.");
305305
}
306306
else {
@@ -309,15 +309,129 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_
309309
* it into the cache here, since it was explicitly requested and is
310310
* otherwise not reachable from anywhere in the system image.
311311
*/
312-
if (!jl_mi_cache_has_ci(mi, codeinst))
312+
if (codeinst && !jl_mi_cache_has_ci(mi, codeinst)) {
313+
JL_GC_PUSH1(&codeinst);
313314
jl_mi_cache_insert(mi, codeinst);
315+
JL_GC_POP();
316+
}
314317
}
315318
}
316-
JL_GC_POP();
317319
return codeinst;
318320
}
319321

320-
arraylist_t new_invokes;
322+
typedef DenseMap<jl_code_instance_t*, std::pair<orc::ThreadSafeModule, jl_llvm_functions_t>> jl_compiled_functions_t;
323+
static void compile_workqueue(jl_codegen_params_t &params, CompilationPolicy policy, jl_compiled_functions_t &compiled_functions)
324+
{
325+
decltype(params.workqueue) workqueue;
326+
std::swap(params.workqueue, workqueue);
327+
jl_code_info_t *src = NULL;
328+
jl_code_instance_t *codeinst = NULL;
329+
JL_GC_PUSH2(&src, &codeinst);
330+
assert(!params.cache);
331+
while (!workqueue.empty()) {
332+
auto it = workqueue.pop_back_val();
333+
codeinst = it.first;
334+
auto &proto = it.second;
335+
// try to emit code for this item from the workqueue
336+
StringRef invokeName = "";
337+
StringRef preal_decl = "";
338+
bool preal_specsig = false;
339+
{
340+
auto it = compiled_functions.find(codeinst);
341+
if (it == compiled_functions.end()) {
342+
// Reinfer the function. The JIT came along and removed the inferred
343+
// method body. See #34993
344+
if ((policy != CompilationPolicy::Default || params.params->trim) &&
345+
jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) {
346+
// XXX: SOURCE_MODE_FORCE_SOURCE is wrong here (neither sufficient nor necessary)
347+
codeinst = jl_type_infer(codeinst->def, jl_atomic_load_relaxed(&codeinst->max_world), SOURCE_MODE_FORCE_SOURCE);
348+
}
349+
if (codeinst) {
350+
orc::ThreadSafeModule result_m =
351+
jl_create_ts_module(name_from_method_instance(codeinst->def),
352+
params.tsctx, params.DL, params.TargetTriple);
353+
auto decls = jl_emit_codeinst(result_m, codeinst, NULL, params);
354+
if (result_m)
355+
it = compiled_functions.insert(std::make_pair(codeinst, std::make_pair(std::move(result_m), std::move(decls)))).first;
356+
}
357+
}
358+
if (it != compiled_functions.end()) {
359+
auto &decls = it->second.second;
360+
invokeName = decls.functionObject;
361+
if (decls.functionObject == "jl_fptr_args") {
362+
preal_decl = decls.specFunctionObject;
363+
}
364+
else if (decls.functionObject != "jl_fptr_sparam" && decls.functionObject != "jl_f_opaque_closure_call") {
365+
preal_decl = decls.specFunctionObject;
366+
preal_specsig = true;
367+
}
368+
}
369+
}
370+
// patch up the prototype we emitted earlier
371+
Module *mod = proto.decl->getParent();
372+
assert(proto.decl->isDeclaration());
373+
Function *pinvoke = nullptr;
374+
if (preal_decl.empty()) {
375+
if (invokeName.empty() && params.params->trim) {
376+
errs() << "Bailed out to invoke when compiling:";
377+
jl_(codeinst->def);
378+
abort();
379+
}
380+
pinvoke = emit_tojlinvoke(codeinst, invokeName, mod, params);
381+
if (!proto.specsig)
382+
proto.decl->replaceAllUsesWith(pinvoke);
383+
}
384+
if (proto.specsig && !preal_specsig) {
385+
// get or build an fptr1 that can invoke codeinst
386+
if (pinvoke == nullptr)
387+
pinvoke = get_or_emit_fptr1(preal_decl, mod);
388+
// emit specsig-to-(jl)invoke conversion
389+
proto.decl->setLinkage(GlobalVariable::InternalLinkage);
390+
//protodecl->setAlwaysInline();
391+
jl_init_function(proto.decl, params.TargetTriple);
392+
jl_method_instance_t *mi = codeinst->def;
393+
size_t nrealargs = jl_nparams(mi->specTypes); // number of actual arguments being passed
394+
bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
395+
// TODO: maybe this can be cached in codeinst->specfptr?
396+
emit_specsig_to_fptr1(proto.decl, proto.cc, proto.return_roots, mi->specTypes, codeinst->rettype, is_opaque_closure, nrealargs, params, pinvoke, 0, 0);
397+
preal_decl = ""; // no need to fixup the name
398+
}
399+
if (!preal_decl.empty()) {
400+
// merge and/or rename this prototype to the real function
401+
if (Value *specfun = mod->getNamedValue(preal_decl)) {
402+
if (proto.decl != specfun)
403+
proto.decl->replaceAllUsesWith(specfun);
404+
}
405+
else {
406+
proto.decl->setName(preal_decl);
407+
}
408+
}
409+
if (proto.oc) { // additionally, if we are dealing with an oc, then we might also need to fix up the fptr1 reference too
410+
assert(proto.specsig);
411+
StringRef ocinvokeDecl = invokeName;
412+
// if OC expected a specialized specsig dispatch, but we don't have it, use the inner trampoline here too
413+
// XXX: this invoke translation logic is supposed to exactly match new_opaque_closure
414+
if (!preal_specsig || ocinvokeDecl == "jl_f_opaque_closure_call" || ocinvokeDecl == "jl_fptr_interpret_call" || ocinvokeDecl == "jl_fptr_const_return")
415+
ocinvokeDecl = pinvoke->getName();
416+
assert(!ocinvokeDecl.empty());
417+
assert(ocinvokeDecl != "jl_fptr_args");
418+
assert(ocinvokeDecl != "jl_fptr_sparam");
419+
// merge and/or rename this prototype to the real function
420+
if (Value *specfun = mod->getNamedValue(ocinvokeDecl)) {
421+
if (proto.oc != specfun)
422+
proto.oc->replaceAllUsesWith(specfun);
423+
}
424+
else {
425+
proto.oc->setName(ocinvokeDecl);
426+
}
427+
}
428+
workqueue.append(params.workqueue);
429+
params.workqueue.clear();
430+
}
431+
JL_GC_POP();
432+
}
433+
434+
321435
// takes the running content that has collected in the shadow module and dump it to disk
322436
// this builds the object file portion of the sysimage files for fast startup, and can
323437
// also be used be extern consumers like GPUCompiler.jl to obtain a module containing
@@ -346,7 +460,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
346460
orc::ThreadSafeContext ctx;
347461
orc::ThreadSafeModule backing;
348462
if (!llvmmod) {
349-
ctx = jl_ExecutionEngine->acquireContext();
463+
ctx = jl_ExecutionEngine->makeContext();
350464
backing = jl_create_ts_module("text", ctx);
351465
}
352466
orc::ThreadSafeModule &clone = llvmmod ? *unwrap(llvmmod) : backing;
@@ -367,11 +481,11 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
367481
params.imaging_mode = imaging;
368482
params.debug_level = cgparams->debug_info_level;
369483
params.external_linkage = _external_linkage;
370-
arraylist_new(&new_invokes, 0);
371484
size_t compile_for[] = { jl_typeinf_world, _world };
372485
int worlds = 0;
373486
if (jl_options.trim != JL_TRIM_NO)
374487
worlds = 1;
488+
jl_compiled_functions_t compiled_functions;
375489
for (; worlds < 2; worlds++) {
376490
JL_TIMING(NATIVE_AOT, NATIVE_Codegen);
377491
size_t this_world = compile_for[worlds];
@@ -391,7 +505,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
391505
continue;
392506
}
393507
mi = (jl_method_instance_t*)item;
394-
compile_mi:
395508
src = NULL;
396509
// if this method is generally visible to the current compilation world,
397510
// and this is either the primary world, or not applicable in the primary world
@@ -406,7 +519,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
406519
jl_(mi);
407520
abort();
408521
}
409-
if (codeinst && !params.compiled_functions.count(codeinst) && !data->jl_fvar_map.count(codeinst)) {
522+
if (codeinst && !compiled_functions.count(codeinst) && !data->jl_fvar_map.count(codeinst)) {
410523
// now add it to our compilation results
411524
// Const returns do not do codegen, but juliac inspects codegen results so make a dummy fvar entry to represent it
412525
if (jl_options.trim != JL_TRIM_NO && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) {
@@ -418,7 +531,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
418531
Triple(clone.getModuleUnlocked()->getTargetTriple()));
419532
jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, NULL, params);
420533
if (result_m)
421-
params.compiled_functions[codeinst] = {std::move(result_m), std::move(decls)};
534+
compiled_functions[codeinst] = {std::move(result_m), std::move(decls)};
422535
else if (jl_options.trim != JL_TRIM_NO) {
423536
// if we're building a small image, we need to compile everything
424537
// to ensure that we have all the information we need.
@@ -428,26 +541,19 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
428541
}
429542
}
430543
}
431-
} else if (this_world != jl_typeinf_world) {
544+
}
545+
else if (this_world != jl_typeinf_world) {
432546
/*
433547
jl_safe_printf("Codegen could not find requested codeinstance to be compiled\n");
434548
jl_(mi);
435549
abort();
436550
*/
437551
}
438-
// TODO: is goto the best way to do this?
439-
jl_compile_workqueue(params, policy);
440-
mi = (jl_method_instance_t*)arraylist_pop(&new_invokes);
441-
if (mi != NULL) {
442-
goto compile_mi;
443-
}
444552
}
445-
446-
// finally, make sure all referenced methods also get compiled or fixed up
447-
jl_compile_workqueue(params, policy);
448553
}
449554
JL_GC_POP();
450-
arraylist_free(&new_invokes);
555+
// finally, make sure all referenced methods also get compiled or fixed up
556+
compile_workqueue(params, policy, compiled_functions);
451557

452558
// process the globals array, before jl_merge_module destroys them
453559
SmallVector<std::string, 0> gvars(params.global_targets.size());
@@ -464,7 +570,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
464570
data->jl_value_to_llvm[idx] = global.first;
465571
idx++;
466572
}
467-
CreateNativeMethods += params.compiled_functions.size();
573+
CreateNativeMethods += compiled_functions.size();
468574

469575
size_t offset = gvars.size();
470576
data->jl_external_to_llvm.resize(params.external_fns.size());
@@ -489,7 +595,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
489595
{
490596
JL_TIMING(NATIVE_AOT, NATIVE_Merge);
491597
Linker L(*clone.getModuleUnlocked());
492-
for (auto &def : params.compiled_functions) {
598+
for (auto &def : compiled_functions) {
493599
jl_merge_module(clone, std::move(std::get<0>(def.second)));
494600
jl_code_instance_t *this_code = def.first;
495601
jl_llvm_functions_t decls = std::get<1>(def.second);
@@ -573,9 +679,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
573679
}
574680
ct->reentrant_timing &= ~1ull;
575681
}
576-
if (ctx.getContext()) {
577-
jl_ExecutionEngine->releaseContext(std::move(ctx));
578-
}
579682
return (void*)data;
580683
}
581684

@@ -1975,11 +2078,6 @@ void jl_dump_native_impl(void *native_code,
19752078
}
19762079
}
19772080

1978-
void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis)
1979-
{
1980-
PM->add(new TargetLibraryInfoWrapperPass(triple));
1981-
PM->add(createTargetTransformInfoWrapperPass(std::move(analysis)));
1982-
}
19832081

19842082
// sometimes in GDB you want to find out what code would be created from a mi
19852083
extern "C" JL_DLLEXPORT_CODEGEN jl_code_info_t *jl_gdbdumpcode(jl_method_instance_t *mi)
@@ -2037,16 +2135,16 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, jl_
20372135
dump->F = nullptr;
20382136
dump->TSM = nullptr;
20392137
if (src && jl_is_code_info(src)) {
2040-
auto ctx = jl_ExecutionEngine->getContext();
2041-
orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), *ctx);
2138+
auto ctx = jl_ExecutionEngine->makeContext();
2139+
orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), ctx);
20422140
uint64_t compiler_start_time = 0;
20432141
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
20442142
if (measure_compile_time_enabled)
20452143
compiler_start_time = jl_hrtime();
20462144
auto target_info = m.withModuleDo([&](Module &M) {
20472145
return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
20482146
});
2049-
jl_codegen_params_t output(*ctx, std::move(target_info.first), std::move(target_info.second));
2147+
jl_codegen_params_t output(ctx, std::move(target_info.first), std::move(target_info.second));
20502148
output.params = &params;
20512149
output.imaging_mode = imaging_default();
20522150
// This would be nice, but currently it causes some assembly regressions that make printed output

0 commit comments

Comments
 (0)