JuliaLang · vtjnash · Oct 19, 2024 · Oct 4, 2024 · Oct 7, 2024 · Oct 11, 2024
diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp
@@ -295,12 +295,12 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_
     jl_value_t *ci = cgparams.lookup(mi, world, world);
     JL_GC_PROMISE_ROOTED(ci);
     jl_code_instance_t *codeinst = NULL;
-    JL_GC_PUSH1(&codeinst);
     if (ci != jl_nothing && jl_atomic_load_relaxed(&((jl_code_instance_t *)ci)->inferred) != jl_nothing) {
         codeinst = (jl_code_instance_t*)ci;
     }
     else {
         if (cgparams.lookup != jl_rettype_inferred_addr) {
+            // XXX: This will corrupt and leak a lot of memory which may be very bad
             jl_error("Refusing to automatically run type inference with custom cache lookup.");
         }
         else {
@@ -309,15 +309,129 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_
              * it into the cache here, since it was explicitly requested and is
              * otherwise not reachable from anywhere in the system image.
              */
-            if (!jl_mi_cache_has_ci(mi, codeinst))
+            if (codeinst && !jl_mi_cache_has_ci(mi, codeinst)) {
+                JL_GC_PUSH1(&codeinst);
                 jl_mi_cache_insert(mi, codeinst);
+                JL_GC_POP();
+            }
         }
     }
-    JL_GC_POP();
     return codeinst;
 }
 
-arraylist_t new_invokes;
+typedef DenseMap<jl_code_instance_t*, std::pair<orc::ThreadSafeModule, jl_llvm_functions_t>> jl_compiled_functions_t;
+static void compile_workqueue(jl_codegen_params_t &params, CompilationPolicy policy, jl_compiled_functions_t &compiled_functions)
+{
+    decltype(params.workqueue) workqueue;
+    std::swap(params.workqueue, workqueue);
+    jl_code_info_t *src = NULL;
+    jl_code_instance_t *codeinst = NULL;
+    JL_GC_PUSH2(&src, &codeinst);
+    assert(!params.cache);
+    while (!workqueue.empty()) {
+        auto it = workqueue.pop_back_val();
+        codeinst = it.first;
+        auto &proto = it.second;
+        // try to emit code for this item from the workqueue
+        StringRef invokeName = "";
+        StringRef preal_decl = "";
+        bool preal_specsig = false;
+        {
+            auto it = compiled_functions.find(codeinst);
+            if (it == compiled_functions.end()) {
+                // Reinfer the function. The JIT came along and removed the inferred
+                // method body. See #34993
+                if ((policy != CompilationPolicy::Default || params.params->trim) &&
+                    jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) {
+                    // XXX: SOURCE_MODE_FORCE_SOURCE is wrong here (neither sufficient nor necessary)
+                    codeinst = jl_type_infer(codeinst->def, jl_atomic_load_relaxed(&codeinst->max_world), SOURCE_MODE_FORCE_SOURCE);
+                }
+                if (codeinst) {
+                    orc::ThreadSafeModule result_m =
+                        jl_create_ts_module(name_from_method_instance(codeinst->def),
+                            params.tsctx, params.DL, params.TargetTriple);
+                    auto decls = jl_emit_codeinst(result_m, codeinst, NULL, params);
+                    if (result_m)
+                        it = compiled_functions.insert(std::make_pair(codeinst, std::make_pair(std::move(result_m), std::move(decls)))).first;
+                }
+            }
+            if (it != compiled_functions.end()) {
+                auto &decls = it->second.second;
+                invokeName = decls.functionObject;
+                if (decls.functionObject == "jl_fptr_args") {
+                    preal_decl = decls.specFunctionObject;
+                }
+                else if (decls.functionObject != "jl_fptr_sparam" && decls.functionObject != "jl_f_opaque_closure_call") {
+                    preal_decl = decls.specFunctionObject;
+                    preal_specsig = true;
+                }
+            }
+        }
+        // patch up the prototype we emitted earlier
+        Module *mod = proto.decl->getParent();
+        assert(proto.decl->isDeclaration());
+        Function *pinvoke = nullptr;
+        if (preal_decl.empty()) {
+            if (invokeName.empty() && params.params->trim) {
+                errs() << "Bailed out to invoke when compiling:";
+                jl_(codeinst->def);
+                abort();
+            }
+            pinvoke = emit_tojlinvoke(codeinst, invokeName, mod, params);
+            if (!proto.specsig)
+                proto.decl->replaceAllUsesWith(pinvoke);
+        }
+        if (proto.specsig && !preal_specsig) {
+            // get or build an fptr1 that can invoke codeinst
+            if (pinvoke == nullptr)
+                pinvoke = get_or_emit_fptr1(preal_decl, mod);
+            // emit specsig-to-(jl)invoke conversion
+            proto.decl->setLinkage(GlobalVariable::InternalLinkage);
+            //protodecl->setAlwaysInline();
+            jl_init_function(proto.decl, params.TargetTriple);
+            jl_method_instance_t *mi = codeinst->def;
+            size_t nrealargs = jl_nparams(mi->specTypes); // number of actual arguments being passed
+            bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
+            // TODO: maybe this can be cached in codeinst->specfptr?
+            emit_specsig_to_fptr1(proto.decl, proto.cc, proto.return_roots, mi->specTypes, codeinst->rettype, is_opaque_closure, nrealargs, params, pinvoke, 0, 0);
+            preal_decl = ""; // no need to fixup the name
+        }
+        if (!preal_decl.empty()) {
+            // merge and/or rename this prototype to the real function
+            if (Value *specfun = mod->getNamedValue(preal_decl)) {
+                if (proto.decl != specfun)
+                    proto.decl->replaceAllUsesWith(specfun);
+            }
+            else {
+                proto.decl->setName(preal_decl);
+            }
+        }
+        if (proto.oc) { // additionally, if we are dealing with an oc, then we might also need to fix up the fptr1 reference too
+            assert(proto.specsig);
+            StringRef ocinvokeDecl = invokeName;
+            // if OC expected a specialized specsig dispatch, but we don't have it, use the inner trampoline here too
+            // XXX: this invoke translation logic is supposed to exactly match new_opaque_closure
+            if (!preal_specsig || ocinvokeDecl == "jl_f_opaque_closure_call" || ocinvokeDecl == "jl_fptr_interpret_call" || ocinvokeDecl == "jl_fptr_const_return")
+                ocinvokeDecl = pinvoke->getName();
+            assert(!ocinvokeDecl.empty());
+            assert(ocinvokeDecl != "jl_fptr_args");
+            assert(ocinvokeDecl != "jl_fptr_sparam");
+            // merge and/or rename this prototype to the real function
+            if (Value *specfun = mod->getNamedValue(ocinvokeDecl)) {
+                if (proto.oc != specfun)
+                    proto.oc->replaceAllUsesWith(specfun);
+            }
+            else {
+                proto.oc->setName(ocinvokeDecl);
+            }
+        }
+        workqueue.append(params.workqueue);
+        params.workqueue.clear();
+    }
+    JL_GC_POP();
+}
+
+
 // takes the running content that has collected in the shadow module and dump it to disk
 // this builds the object file portion of the sysimage files for fast startup, and can
 // also be used be extern consumers like GPUCompiler.jl to obtain a module containing
@@ -346,7 +460,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
     orc::ThreadSafeContext ctx;
     orc::ThreadSafeModule backing;
     if (!llvmmod) {
-        ctx = jl_ExecutionEngine->acquireContext();
+        ctx = jl_ExecutionEngine->makeContext();
         backing = jl_create_ts_module("text", ctx);
     }
     orc::ThreadSafeModule &clone = llvmmod ? *unwrap(llvmmod) : backing;
@@ -367,11 +481,11 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
     params.imaging_mode = imaging;
     params.debug_level = cgparams->debug_info_level;
     params.external_linkage = _external_linkage;
-    arraylist_new(&new_invokes, 0);
     size_t compile_for[] = { jl_typeinf_world, _world };
     int worlds = 0;
     if (jl_options.trim != JL_TRIM_NO)
         worlds = 1;
+    jl_compiled_functions_t compiled_functions;
     for (; worlds < 2; worlds++) {
         JL_TIMING(NATIVE_AOT, NATIVE_Codegen);
         size_t this_world = compile_for[worlds];
@@ -391,7 +505,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
                 continue;
             }
             mi = (jl_method_instance_t*)item;
-compile_mi:
             src = NULL;
             // if this method is generally visible to the current compilation world,
             // and this is either the primary world, or not applicable in the primary world
@@ -406,7 +519,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
                     jl_(mi);
                     abort();
                 }
-                if (codeinst && !params.compiled_functions.count(codeinst) && !data->jl_fvar_map.count(codeinst)) {
+                if (codeinst && !compiled_functions.count(codeinst) && !data->jl_fvar_map.count(codeinst)) {
                     // now add it to our compilation results
                     // Const returns do not do codegen, but juliac inspects codegen results so make a dummy fvar entry to represent it
                     if (jl_options.trim != JL_TRIM_NO && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) {
@@ -418,7 +531,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
                                 Triple(clone.getModuleUnlocked()->getTargetTriple()));
                         jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, NULL, params);
                         if (result_m)
-                            params.compiled_functions[codeinst] = {std::move(result_m), std::move(decls)};
+                            compiled_functions[codeinst] = {std::move(result_m), std::move(decls)};
                         else if (jl_options.trim != JL_TRIM_NO) {
                             // if we're building a small image, we need to compile everything
                             // to ensure that we have all the information we need.
@@ -428,26 +541,19 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
                         }
                     }
                 }
-            } else if (this_world != jl_typeinf_world) {
+            }
+            else if (this_world != jl_typeinf_world) {
                 /*
                 jl_safe_printf("Codegen could not find requested codeinstance to be compiled\n");
                 jl_(mi);
                 abort();
                 */
             }
-            // TODO: is goto the best way to do this?
-            jl_compile_workqueue(params, policy);
-            mi = (jl_method_instance_t*)arraylist_pop(&new_invokes);
-            if (mi != NULL) {
-                goto compile_mi;
-            }
         }
-
-        // finally, make sure all referenced methods also get compiled or fixed up
-        jl_compile_workqueue(params, policy);
     }
     JL_GC_POP();
-    arraylist_free(&new_invokes);
+    // finally, make sure all referenced methods also get compiled or fixed up
+    compile_workqueue(params, policy, compiled_functions);
 
     // process the globals array, before jl_merge_module destroys them
     SmallVector<std::string, 0> gvars(params.global_targets.size());
@@ -464,7 +570,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
         data->jl_value_to_llvm[idx] = global.first;
         idx++;
     }
-    CreateNativeMethods += params.compiled_functions.size();
+    CreateNativeMethods += compiled_functions.size();
 
     size_t offset = gvars.size();
     data->jl_external_to_llvm.resize(params.external_fns.size());
@@ -489,7 +595,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
     {
         JL_TIMING(NATIVE_AOT, NATIVE_Merge);
         Linker L(*clone.getModuleUnlocked());
-        for (auto &def : params.compiled_functions) {
+        for (auto &def : compiled_functions) {
             jl_merge_module(clone, std::move(std::get<0>(def.second)));
             jl_code_instance_t *this_code = def.first;
             jl_llvm_functions_t decls = std::get<1>(def.second);
@@ -573,9 +679,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
         }
         ct->reentrant_timing &= ~1ull;
     }
-    if (ctx.getContext()) {
-        jl_ExecutionEngine->releaseContext(std::move(ctx));
-    }
     return (void*)data;
 }
 
@@ -1975,11 +2078,6 @@ void jl_dump_native_impl(void *native_code,
     }
 }
 
-void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis)
-{
-    PM->add(new TargetLibraryInfoWrapperPass(triple));
-    PM->add(createTargetTransformInfoWrapperPass(std::move(analysis)));
-}
 
 // sometimes in GDB you want to find out what code would be created from a mi
 extern "C" JL_DLLEXPORT_CODEGEN jl_code_info_t *jl_gdbdumpcode(jl_method_instance_t *mi)
@@ -2037,16 +2135,16 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, jl_
     dump->F = nullptr;
     dump->TSM = nullptr;
     if (src && jl_is_code_info(src)) {
-        auto ctx = jl_ExecutionEngine->getContext();
-        orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), *ctx);
+        auto ctx = jl_ExecutionEngine->makeContext();
+        orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), ctx);
         uint64_t compiler_start_time = 0;
         uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
         if (measure_compile_time_enabled)
             compiler_start_time = jl_hrtime();
         auto target_info = m.withModuleDo([&](Module &M) {
             return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
         });
-        jl_codegen_params_t output(*ctx, std::move(target_info.first), std::move(target_info.second));
+        jl_codegen_params_t output(ctx, std::move(target_info.first), std::move(target_info.second));
         output.params = &params;
         output.imaging_mode = imaging_default();
         // This would be nice, but currently it causes some assembly regressions that make printed output