@@ -295,12 +295,12 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_
295295 jl_value_t *ci = cgparams.lookup (mi, world, world);
296296 JL_GC_PROMISE_ROOTED (ci);
297297 jl_code_instance_t *codeinst = NULL ;
298- JL_GC_PUSH1 (&codeinst);
299298 if (ci != jl_nothing && jl_atomic_load_relaxed (&((jl_code_instance_t *)ci)->inferred ) != jl_nothing) {
300299 codeinst = (jl_code_instance_t *)ci;
301300 }
302301 else {
303302 if (cgparams.lookup != jl_rettype_inferred_addr) {
303+ // XXX: This will corrupt and leak a lot of memory which may be very bad
304304 jl_error (" Refusing to automatically run type inference with custom cache lookup." );
305305 }
306306 else {
@@ -309,15 +309,129 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_
309309 * it into the cache here, since it was explicitly requested and is
310310 * otherwise not reachable from anywhere in the system image.
311311 */
312- if (!jl_mi_cache_has_ci (mi, codeinst))
312+ if (codeinst && !jl_mi_cache_has_ci (mi, codeinst)) {
313+ JL_GC_PUSH1 (&codeinst);
313314 jl_mi_cache_insert (mi, codeinst);
315+ JL_GC_POP ();
316+ }
314317 }
315318 }
316- JL_GC_POP ();
317319 return codeinst;
318320}
319321
320- arraylist_t new_invokes;
322+ typedef DenseMap<jl_code_instance_t *, std::pair<orc::ThreadSafeModule, jl_llvm_functions_t >> jl_compiled_functions_t ;
323+ static void compile_workqueue (jl_codegen_params_t ¶ms, CompilationPolicy policy, jl_compiled_functions_t &compiled_functions)
324+ {
325+ decltype (params.workqueue ) workqueue;
326+ std::swap (params.workqueue , workqueue);
327+ jl_code_info_t *src = NULL ;
328+ jl_code_instance_t *codeinst = NULL ;
329+ JL_GC_PUSH2 (&src, &codeinst);
330+ assert (!params.cache );
331+ while (!workqueue.empty ()) {
332+ auto it = workqueue.pop_back_val ();
333+ codeinst = it.first ;
334+ auto &proto = it.second ;
335+ // try to emit code for this item from the workqueue
336+ StringRef invokeName = " " ;
337+ StringRef preal_decl = " " ;
338+ bool preal_specsig = false ;
339+ {
340+ auto it = compiled_functions.find (codeinst);
341+ if (it == compiled_functions.end ()) {
342+ // Reinfer the function. The JIT came along and removed the inferred
343+ // method body. See #34993
344+ if ((policy != CompilationPolicy::Default || params.params ->trim ) &&
345+ jl_atomic_load_relaxed (&codeinst->inferred ) == jl_nothing) {
346+ // XXX: SOURCE_MODE_FORCE_SOURCE is wrong here (neither sufficient nor necessary)
347+ codeinst = jl_type_infer (codeinst->def , jl_atomic_load_relaxed (&codeinst->max_world ), SOURCE_MODE_FORCE_SOURCE);
348+ }
349+ if (codeinst) {
350+ orc::ThreadSafeModule result_m =
351+ jl_create_ts_module (name_from_method_instance (codeinst->def ),
352+ params.tsctx , params.DL , params.TargetTriple );
353+ auto decls = jl_emit_codeinst (result_m, codeinst, NULL , params);
354+ if (result_m)
355+ it = compiled_functions.insert (std::make_pair (codeinst, std::make_pair (std::move (result_m), std::move (decls)))).first ;
356+ }
357+ }
358+ if (it != compiled_functions.end ()) {
359+ auto &decls = it->second .second ;
360+ invokeName = decls.functionObject ;
361+ if (decls.functionObject == " jl_fptr_args" ) {
362+ preal_decl = decls.specFunctionObject ;
363+ }
364+ else if (decls.functionObject != " jl_fptr_sparam" && decls.functionObject != " jl_f_opaque_closure_call" ) {
365+ preal_decl = decls.specFunctionObject ;
366+ preal_specsig = true ;
367+ }
368+ }
369+ }
370+ // patch up the prototype we emitted earlier
371+ Module *mod = proto.decl ->getParent ();
372+ assert (proto.decl ->isDeclaration ());
373+ Function *pinvoke = nullptr ;
374+ if (preal_decl.empty ()) {
375+ if (invokeName.empty () && params.params ->trim ) {
376+ errs () << " Bailed out to invoke when compiling:" ;
377+ jl_ (codeinst->def );
378+ abort ();
379+ }
380+ pinvoke = emit_tojlinvoke (codeinst, invokeName, mod, params);
381+ if (!proto.specsig )
382+ proto.decl ->replaceAllUsesWith (pinvoke);
383+ }
384+ if (proto.specsig && !preal_specsig) {
385+ // get or build an fptr1 that can invoke codeinst
386+ if (pinvoke == nullptr )
387+ pinvoke = get_or_emit_fptr1 (preal_decl, mod);
388+ // emit specsig-to-(jl)invoke conversion
389+ proto.decl ->setLinkage (GlobalVariable::InternalLinkage);
390+ // protodecl->setAlwaysInline();
391+ jl_init_function (proto.decl , params.TargetTriple );
392+ jl_method_instance_t *mi = codeinst->def ;
393+ size_t nrealargs = jl_nparams (mi->specTypes ); // number of actual arguments being passed
394+ bool is_opaque_closure = jl_is_method (mi->def .value ) && mi->def .method ->is_for_opaque_closure ;
395+ // TODO: maybe this can be cached in codeinst->specfptr?
396+ emit_specsig_to_fptr1 (proto.decl , proto.cc , proto.return_roots , mi->specTypes , codeinst->rettype , is_opaque_closure, nrealargs, params, pinvoke, 0 , 0 );
397+ preal_decl = " " ; // no need to fixup the name
398+ }
399+ if (!preal_decl.empty ()) {
400+ // merge and/or rename this prototype to the real function
401+ if (Value *specfun = mod->getNamedValue (preal_decl)) {
402+ if (proto.decl != specfun)
403+ proto.decl ->replaceAllUsesWith (specfun);
404+ }
405+ else {
406+ proto.decl ->setName (preal_decl);
407+ }
408+ }
409+ if (proto.oc ) { // additionally, if we are dealing with an oc, then we might also need to fix up the fptr1 reference too
410+ assert (proto.specsig );
411+ StringRef ocinvokeDecl = invokeName;
412+ // if OC expected a specialized specsig dispatch, but we don't have it, use the inner trampoline here too
413+ // XXX: this invoke translation logic is supposed to exactly match new_opaque_closure
414+ if (!preal_specsig || ocinvokeDecl == " jl_f_opaque_closure_call" || ocinvokeDecl == " jl_fptr_interpret_call" || ocinvokeDecl == " jl_fptr_const_return" )
415+ ocinvokeDecl = pinvoke->getName ();
416+ assert (!ocinvokeDecl.empty ());
417+ assert (ocinvokeDecl != " jl_fptr_args" );
418+ assert (ocinvokeDecl != " jl_fptr_sparam" );
419+ // merge and/or rename this prototype to the real function
420+ if (Value *specfun = mod->getNamedValue (ocinvokeDecl)) {
421+ if (proto.oc != specfun)
422+ proto.oc ->replaceAllUsesWith (specfun);
423+ }
424+ else {
425+ proto.oc ->setName (ocinvokeDecl);
426+ }
427+ }
428+ workqueue.append (params.workqueue );
429+ params.workqueue .clear ();
430+ }
431+ JL_GC_POP ();
432+ }
433+
434+
321435// takes the running content that has collected in the shadow module and dump it to disk
322436// this builds the object file portion of the sysimage files for fast startup, and can
323437// also be used be extern consumers like GPUCompiler.jl to obtain a module containing
@@ -346,7 +460,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
346460 orc::ThreadSafeContext ctx;
347461 orc::ThreadSafeModule backing;
348462 if (!llvmmod) {
349- ctx = jl_ExecutionEngine->acquireContext ();
463+ ctx = jl_ExecutionEngine->makeContext ();
350464 backing = jl_create_ts_module (" text" , ctx);
351465 }
352466 orc::ThreadSafeModule &clone = llvmmod ? *unwrap (llvmmod) : backing;
@@ -367,11 +481,11 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
367481 params.imaging_mode = imaging;
368482 params.debug_level = cgparams->debug_info_level ;
369483 params.external_linkage = _external_linkage;
370- arraylist_new (&new_invokes, 0 );
371484 size_t compile_for[] = { jl_typeinf_world, _world };
372485 int worlds = 0 ;
373486 if (jl_options.trim != JL_TRIM_NO)
374487 worlds = 1 ;
488+ jl_compiled_functions_t compiled_functions;
375489 for (; worlds < 2 ; worlds++) {
376490 JL_TIMING (NATIVE_AOT, NATIVE_Codegen);
377491 size_t this_world = compile_for[worlds];
@@ -391,7 +505,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
391505 continue ;
392506 }
393507 mi = (jl_method_instance_t *)item;
394- compile_mi:
395508 src = NULL ;
396509 // if this method is generally visible to the current compilation world,
397510 // and this is either the primary world, or not applicable in the primary world
@@ -406,7 +519,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
406519 jl_ (mi);
407520 abort ();
408521 }
409- if (codeinst && !params. compiled_functions .count (codeinst) && !data->jl_fvar_map .count (codeinst)) {
522+ if (codeinst && !compiled_functions.count (codeinst) && !data->jl_fvar_map .count (codeinst)) {
410523 // now add it to our compilation results
411524 // Const returns do not do codegen, but juliac inspects codegen results so make a dummy fvar entry to represent it
412525 if (jl_options.trim != JL_TRIM_NO && jl_atomic_load_relaxed (&codeinst->invoke ) == jl_fptr_const_return_addr) {
@@ -418,7 +531,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
418531 Triple (clone.getModuleUnlocked ()->getTargetTriple ()));
419532 jl_llvm_functions_t decls = jl_emit_codeinst (result_m, codeinst, NULL , params);
420533 if (result_m)
421- params. compiled_functions [codeinst] = {std::move (result_m), std::move (decls)};
534+ compiled_functions[codeinst] = {std::move (result_m), std::move (decls)};
422535 else if (jl_options.trim != JL_TRIM_NO) {
423536 // if we're building a small image, we need to compile everything
424537 // to ensure that we have all the information we need.
@@ -428,26 +541,19 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
428541 }
429542 }
430543 }
431- } else if (this_world != jl_typeinf_world) {
544+ }
545+ else if (this_world != jl_typeinf_world) {
432546 /*
433547 jl_safe_printf("Codegen could not find requested codeinstance to be compiled\n");
434548 jl_(mi);
435549 abort();
436550 */
437551 }
438- // TODO: is goto the best way to do this?
439- jl_compile_workqueue (params, policy);
440- mi = (jl_method_instance_t *)arraylist_pop (&new_invokes);
441- if (mi != NULL ) {
442- goto compile_mi;
443- }
444552 }
445-
446- // finally, make sure all referenced methods also get compiled or fixed up
447- jl_compile_workqueue (params, policy);
448553 }
449554 JL_GC_POP ();
450- arraylist_free (&new_invokes);
555+ // finally, make sure all referenced methods also get compiled or fixed up
556+ compile_workqueue (params, policy, compiled_functions);
451557
452558 // process the globals array, before jl_merge_module destroys them
453559 SmallVector<std::string, 0 > gvars (params.global_targets .size ());
@@ -464,7 +570,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
464570 data->jl_value_to_llvm [idx] = global.first ;
465571 idx++;
466572 }
467- CreateNativeMethods += params. compiled_functions .size ();
573+ CreateNativeMethods += compiled_functions.size ();
468574
469575 size_t offset = gvars.size ();
470576 data->jl_external_to_llvm .resize (params.external_fns .size ());
@@ -489,7 +595,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
489595 {
490596 JL_TIMING (NATIVE_AOT, NATIVE_Merge);
491597 Linker L (*clone.getModuleUnlocked ());
492- for (auto &def : params. compiled_functions ) {
598+ for (auto &def : compiled_functions) {
493599 jl_merge_module (clone, std::move (std::get<0 >(def.second )));
494600 jl_code_instance_t *this_code = def.first ;
495601 jl_llvm_functions_t decls = std::get<1 >(def.second );
@@ -573,9 +679,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
573679 }
574680 ct->reentrant_timing &= ~1ull ;
575681 }
576- if (ctx.getContext ()) {
577- jl_ExecutionEngine->releaseContext (std::move (ctx));
578- }
579682 return (void *)data;
580683}
581684
@@ -1975,11 +2078,6 @@ void jl_dump_native_impl(void *native_code,
19752078 }
19762079}
19772080
1978- void addTargetPasses (legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis)
1979- {
1980- PM->add (new TargetLibraryInfoWrapperPass (triple));
1981- PM->add (createTargetTransformInfoWrapperPass (std::move (analysis)));
1982- }
19832081
19842082// sometimes in GDB you want to find out what code would be created from a mi
19852083extern " C" JL_DLLEXPORT_CODEGEN jl_code_info_t *jl_gdbdumpcode (jl_method_instance_t *mi)
@@ -2037,16 +2135,16 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, jl_
20372135 dump->F = nullptr ;
20382136 dump->TSM = nullptr ;
20392137 if (src && jl_is_code_info (src)) {
2040- auto ctx = jl_ExecutionEngine->getContext ();
2041- orc::ThreadSafeModule m = jl_create_ts_module (name_from_method_instance (mi), * ctx);
2138+ auto ctx = jl_ExecutionEngine->makeContext ();
2139+ orc::ThreadSafeModule m = jl_create_ts_module (name_from_method_instance (mi), ctx);
20422140 uint64_t compiler_start_time = 0 ;
20432141 uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed (&jl_measure_compile_time_enabled);
20442142 if (measure_compile_time_enabled)
20452143 compiler_start_time = jl_hrtime ();
20462144 auto target_info = m.withModuleDo ([&](Module &M) {
20472145 return std::make_pair (M.getDataLayout (), Triple (M.getTargetTriple ()));
20482146 });
2049- jl_codegen_params_t output (* ctx, std::move (target_info.first ), std::move (target_info.second ));
2147+ jl_codegen_params_t output (ctx, std::move (target_info.first ), std::move (target_info.second ));
20502148 output.params = ¶ms;
20512149 output.imaging_mode = imaging_default ();
20522150 // This would be nice, but currently it causes some assembly regressions that make printed output
0 commit comments