diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 279686c387e1b..a3ffdf1d051a9 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -295,12 +295,12 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_ jl_value_t *ci = cgparams.lookup(mi, world, world); JL_GC_PROMISE_ROOTED(ci); jl_code_instance_t *codeinst = NULL; - JL_GC_PUSH1(&codeinst); if (ci != jl_nothing && jl_atomic_load_relaxed(&((jl_code_instance_t *)ci)->inferred) != jl_nothing) { codeinst = (jl_code_instance_t*)ci; } else { if (cgparams.lookup != jl_rettype_inferred_addr) { + // XXX: This will corrupt and leak a lot of memory which may be very bad jl_error("Refusing to automatically run type inference with custom cache lookup."); } else { @@ -309,15 +309,129 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_ * it into the cache here, since it was explicitly requested and is * otherwise not reachable from anywhere in the system image. */ - if (!jl_mi_cache_has_ci(mi, codeinst)) + if (codeinst && !jl_mi_cache_has_ci(mi, codeinst)) { + JL_GC_PUSH1(&codeinst); jl_mi_cache_insert(mi, codeinst); + JL_GC_POP(); + } } } - JL_GC_POP(); return codeinst; } -arraylist_t new_invokes; +typedef DenseMap> jl_compiled_functions_t; +static void compile_workqueue(jl_codegen_params_t ¶ms, CompilationPolicy policy, jl_compiled_functions_t &compiled_functions) +{ + decltype(params.workqueue) workqueue; + std::swap(params.workqueue, workqueue); + jl_code_info_t *src = NULL; + jl_code_instance_t *codeinst = NULL; + JL_GC_PUSH2(&src, &codeinst); + assert(!params.cache); + while (!workqueue.empty()) { + auto it = workqueue.pop_back_val(); + codeinst = it.first; + auto &proto = it.second; + // try to emit code for this item from the workqueue + StringRef invokeName = ""; + StringRef preal_decl = ""; + bool preal_specsig = false; + { + auto it = compiled_functions.find(codeinst); + if (it == compiled_functions.end()) { + // Reinfer the function. The JIT came along and removed the inferred + // method body. See #34993 + if ((policy != CompilationPolicy::Default || params.params->trim) && + jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) { + // XXX: SOURCE_MODE_FORCE_SOURCE is wrong here (neither sufficient nor necessary) + codeinst = jl_type_infer(codeinst->def, jl_atomic_load_relaxed(&codeinst->max_world), SOURCE_MODE_FORCE_SOURCE); + } + if (codeinst) { + orc::ThreadSafeModule result_m = + jl_create_ts_module(name_from_method_instance(codeinst->def), + params.tsctx, params.DL, params.TargetTriple); + auto decls = jl_emit_codeinst(result_m, codeinst, NULL, params); + if (result_m) + it = compiled_functions.insert(std::make_pair(codeinst, std::make_pair(std::move(result_m), std::move(decls)))).first; + } + } + if (it != compiled_functions.end()) { + auto &decls = it->second.second; + invokeName = decls.functionObject; + if (decls.functionObject == "jl_fptr_args") { + preal_decl = decls.specFunctionObject; + } + else if (decls.functionObject != "jl_fptr_sparam" && decls.functionObject != "jl_f_opaque_closure_call") { + preal_decl = decls.specFunctionObject; + preal_specsig = true; + } + } + } + // patch up the prototype we emitted earlier + Module *mod = proto.decl->getParent(); + assert(proto.decl->isDeclaration()); + Function *pinvoke = nullptr; + if (preal_decl.empty()) { + if (invokeName.empty() && params.params->trim) { + errs() << "Bailed out to invoke when compiling:"; + jl_(codeinst->def); + abort(); + } + pinvoke = emit_tojlinvoke(codeinst, invokeName, mod, params); + if (!proto.specsig) + proto.decl->replaceAllUsesWith(pinvoke); + } + if (proto.specsig && !preal_specsig) { + // get or build an fptr1 that can invoke codeinst + if (pinvoke == nullptr) + pinvoke = get_or_emit_fptr1(preal_decl, mod); + // emit specsig-to-(jl)invoke conversion + proto.decl->setLinkage(GlobalVariable::InternalLinkage); + //protodecl->setAlwaysInline(); + jl_init_function(proto.decl, params.TargetTriple); + jl_method_instance_t *mi = codeinst->def; + size_t nrealargs = jl_nparams(mi->specTypes); // number of actual arguments being passed + bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure; + // TODO: maybe this can be cached in codeinst->specfptr? + emit_specsig_to_fptr1(proto.decl, proto.cc, proto.return_roots, mi->specTypes, codeinst->rettype, is_opaque_closure, nrealargs, params, pinvoke, 0, 0); + preal_decl = ""; // no need to fixup the name + } + if (!preal_decl.empty()) { + // merge and/or rename this prototype to the real function + if (Value *specfun = mod->getNamedValue(preal_decl)) { + if (proto.decl != specfun) + proto.decl->replaceAllUsesWith(specfun); + } + else { + proto.decl->setName(preal_decl); + } + } + if (proto.oc) { // additionally, if we are dealing with an oc, then we might also need to fix up the fptr1 reference too + assert(proto.specsig); + StringRef ocinvokeDecl = invokeName; + // if OC expected a specialized specsig dispatch, but we don't have it, use the inner trampoline here too + // XXX: this invoke translation logic is supposed to exactly match new_opaque_closure + if (!preal_specsig || ocinvokeDecl == "jl_f_opaque_closure_call" || ocinvokeDecl == "jl_fptr_interpret_call" || ocinvokeDecl == "jl_fptr_const_return") + ocinvokeDecl = pinvoke->getName(); + assert(!ocinvokeDecl.empty()); + assert(ocinvokeDecl != "jl_fptr_args"); + assert(ocinvokeDecl != "jl_fptr_sparam"); + // merge and/or rename this prototype to the real function + if (Value *specfun = mod->getNamedValue(ocinvokeDecl)) { + if (proto.oc != specfun) + proto.oc->replaceAllUsesWith(specfun); + } + else { + proto.oc->setName(ocinvokeDecl); + } + } + workqueue.append(params.workqueue); + params.workqueue.clear(); + } + JL_GC_POP(); +} + + // takes the running content that has collected in the shadow module and dump it to disk // this builds the object file portion of the sysimage files for fast startup, and can // also be used be extern consumers like GPUCompiler.jl to obtain a module containing @@ -346,7 +460,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm orc::ThreadSafeContext ctx; orc::ThreadSafeModule backing; if (!llvmmod) { - ctx = jl_ExecutionEngine->acquireContext(); + ctx = jl_ExecutionEngine->makeContext(); backing = jl_create_ts_module("text", ctx); } orc::ThreadSafeModule &clone = llvmmod ? *unwrap(llvmmod) : backing; @@ -367,11 +481,11 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm params.imaging_mode = imaging; params.debug_level = cgparams->debug_info_level; params.external_linkage = _external_linkage; - arraylist_new(&new_invokes, 0); size_t compile_for[] = { jl_typeinf_world, _world }; int worlds = 0; if (jl_options.trim != JL_TRIM_NO) worlds = 1; + jl_compiled_functions_t compiled_functions; for (; worlds < 2; worlds++) { JL_TIMING(NATIVE_AOT, NATIVE_Codegen); size_t this_world = compile_for[worlds]; @@ -391,7 +505,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm continue; } mi = (jl_method_instance_t*)item; -compile_mi: src = NULL; // if this method is generally visible to the current compilation world, // and this is either the primary world, or not applicable in the primary world @@ -406,7 +519,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm jl_(mi); abort(); } - if (codeinst && !params.compiled_functions.count(codeinst) && !data->jl_fvar_map.count(codeinst)) { + if (codeinst && !compiled_functions.count(codeinst) && !data->jl_fvar_map.count(codeinst)) { // now add it to our compilation results // Const returns do not do codegen, but juliac inspects codegen results so make a dummy fvar entry to represent it if (jl_options.trim != JL_TRIM_NO && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) { @@ -418,7 +531,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm Triple(clone.getModuleUnlocked()->getTargetTriple())); jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, NULL, params); if (result_m) - params.compiled_functions[codeinst] = {std::move(result_m), std::move(decls)}; + compiled_functions[codeinst] = {std::move(result_m), std::move(decls)}; else if (jl_options.trim != JL_TRIM_NO) { // if we're building a small image, we need to compile everything // to ensure that we have all the information we need. @@ -428,26 +541,19 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm } } } - } else if (this_world != jl_typeinf_world) { + } + else if (this_world != jl_typeinf_world) { /* jl_safe_printf("Codegen could not find requested codeinstance to be compiled\n"); jl_(mi); abort(); */ } - // TODO: is goto the best way to do this? - jl_compile_workqueue(params, policy); - mi = (jl_method_instance_t*)arraylist_pop(&new_invokes); - if (mi != NULL) { - goto compile_mi; - } } - - // finally, make sure all referenced methods also get compiled or fixed up - jl_compile_workqueue(params, policy); } JL_GC_POP(); - arraylist_free(&new_invokes); + // finally, make sure all referenced methods also get compiled or fixed up + compile_workqueue(params, policy, compiled_functions); // process the globals array, before jl_merge_module destroys them SmallVector gvars(params.global_targets.size()); @@ -464,7 +570,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm data->jl_value_to_llvm[idx] = global.first; idx++; } - CreateNativeMethods += params.compiled_functions.size(); + CreateNativeMethods += compiled_functions.size(); size_t offset = gvars.size(); data->jl_external_to_llvm.resize(params.external_fns.size()); @@ -489,7 +595,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm { JL_TIMING(NATIVE_AOT, NATIVE_Merge); Linker L(*clone.getModuleUnlocked()); - for (auto &def : params.compiled_functions) { + for (auto &def : compiled_functions) { jl_merge_module(clone, std::move(std::get<0>(def.second))); jl_code_instance_t *this_code = def.first; jl_llvm_functions_t decls = std::get<1>(def.second); @@ -573,9 +679,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm } ct->reentrant_timing &= ~1ull; } - if (ctx.getContext()) { - jl_ExecutionEngine->releaseContext(std::move(ctx)); - } return (void*)data; } @@ -1975,11 +2078,6 @@ void jl_dump_native_impl(void *native_code, } } -void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis) -{ - PM->add(new TargetLibraryInfoWrapperPass(triple)); - PM->add(createTargetTransformInfoWrapperPass(std::move(analysis))); -} // sometimes in GDB you want to find out what code would be created from a mi extern "C" JL_DLLEXPORT_CODEGEN jl_code_info_t *jl_gdbdumpcode(jl_method_instance_t *mi) @@ -2037,8 +2135,8 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, jl_ dump->F = nullptr; dump->TSM = nullptr; if (src && jl_is_code_info(src)) { - auto ctx = jl_ExecutionEngine->getContext(); - orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), *ctx); + auto ctx = jl_ExecutionEngine->makeContext(); + orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), ctx); uint64_t compiler_start_time = 0; uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) @@ -2046,7 +2144,7 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, jl_ auto target_info = m.withModuleDo([&](Module &M) { return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple())); }); - jl_codegen_params_t output(*ctx, std::move(target_info.first), std::move(target_info.second)); + jl_codegen_params_t output(ctx, std::move(target_info.first), std::move(target_info.second)); output.params = ¶ms; output.imaging_mode = imaging_default(); // This would be nice, but currently it causes some assembly regressions that make printed output diff --git a/src/cgmemmgr.cpp b/src/cgmemmgr.cpp index 8557698a4e513..c257d2a2e3331 100644 --- a/src/cgmemmgr.cpp +++ b/src/cgmemmgr.cpp @@ -32,14 +32,14 @@ namespace { -static size_t get_block_size(size_t size) +static size_t get_block_size(size_t size) JL_NOTSAFEPOINT { return (size > jl_page_size * 256 ? LLT_ALIGN(size, jl_page_size) : jl_page_size * 256); } // Wrapper function to mmap/munmap/mprotect pages... -static void *map_anon_page(size_t size) +static void *map_anon_page(size_t size) JL_NOTSAFEPOINT { #ifdef _OS_WINDOWS_ char *mem = (char*)VirtualAlloc(NULL, size + jl_page_size, @@ -54,7 +54,7 @@ static void *map_anon_page(size_t size) return mem; } -static void unmap_page(void *ptr, size_t size) +static void unmap_page(void *ptr, size_t size) JL_NOTSAFEPOINT { #ifdef _OS_WINDOWS_ VirtualFree(ptr, size, MEM_DECOMMIT); @@ -71,7 +71,7 @@ enum class Prot : int { NO = PAGE_NOACCESS }; -static void protect_page(void *ptr, size_t size, Prot flags) +static void protect_page(void *ptr, size_t size, Prot flags) JL_NOTSAFEPOINT { DWORD old_prot; if (!VirtualProtect(ptr, size, (DWORD)flags, &old_prot)) { @@ -89,7 +89,7 @@ enum class Prot : int { NO = PROT_NONE }; -static void protect_page(void *ptr, size_t size, Prot flags) +static void protect_page(void *ptr, size_t size, Prot flags) JL_NOTSAFEPOINT { int ret = mprotect(ptr, size, (int)flags); if (ret != 0) { @@ -98,7 +98,7 @@ static void protect_page(void *ptr, size_t size, Prot flags) } } -static bool check_fd_or_close(int fd) +static bool check_fd_or_close(int fd) JL_NOTSAFEPOINT { if (fd == -1) return false; @@ -129,7 +129,7 @@ static intptr_t anon_hdl = -1; // Also, creating big file mapping and then map pieces of it seems to // consume too much global resources. Therefore, we use each file mapping // as a block on windows -static void *create_shared_map(size_t size, size_t id) +static void *create_shared_map(size_t size, size_t id) JL_NOTSAFEPOINT { void *addr = MapViewOfFile((HANDLE)id, FILE_MAP_ALL_ACCESS, 0, 0, size); @@ -137,13 +137,13 @@ static void *create_shared_map(size_t size, size_t id) return addr; } -static intptr_t init_shared_map() +static intptr_t init_shared_map() JL_NOTSAFEPOINT { anon_hdl = 0; return 0; } -static void *alloc_shared_page(size_t size, size_t *id, bool exec) +static void *alloc_shared_page(size_t size, size_t *id, bool exec) JL_NOTSAFEPOINT { assert(size % jl_page_size == 0); DWORD file_mode = exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE; @@ -162,7 +162,7 @@ static void *alloc_shared_page(size_t size, size_t *id, bool exec) } #else // _OS_WINDOWS_ // For shared mapped region -static intptr_t get_anon_hdl(void) +static intptr_t get_anon_hdl(void) JL_NOTSAFEPOINT { int fd = -1; @@ -228,7 +228,7 @@ static struct _make_shared_map_lock { }; } shared_map_lock; -static size_t get_map_size_inc() +static size_t get_map_size_inc() JL_NOTSAFEPOINT { rlimit rl; if (getrlimit(RLIMIT_FSIZE, &rl) != -1) { @@ -242,7 +242,7 @@ static size_t get_map_size_inc() return map_size_inc_default; } -static void *create_shared_map(size_t size, size_t id) +static void *create_shared_map(size_t size, size_t id) JL_NOTSAFEPOINT { void *addr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, anon_hdl, id); @@ -250,7 +250,7 @@ static void *create_shared_map(size_t size, size_t id) return addr; } -static intptr_t init_shared_map() +static intptr_t init_shared_map() JL_NOTSAFEPOINT { anon_hdl = get_anon_hdl(); if (anon_hdl == -1) @@ -265,7 +265,7 @@ static intptr_t init_shared_map() return anon_hdl; } -static void *alloc_shared_page(size_t size, size_t *id, bool exec) +static void *alloc_shared_page(size_t size, size_t *id, bool exec) JL_NOTSAFEPOINT { assert(size % jl_page_size == 0); size_t off = jl_atomic_fetch_add(&map_offset, size); @@ -292,7 +292,7 @@ static void *alloc_shared_page(size_t size, size_t *id, bool exec) #ifdef _OS_LINUX_ // Using `/proc/self/mem`, A.K.A. Keno's remote memory manager. -ssize_t pwrite_addr(int fd, const void *buf, size_t nbyte, uintptr_t addr) +ssize_t pwrite_addr(int fd, const void *buf, size_t nbyte, uintptr_t addr) JL_NOTSAFEPOINT { static_assert(sizeof(off_t) >= 8, "off_t is smaller than 64bits"); #ifdef _P64 @@ -319,7 +319,7 @@ ssize_t pwrite_addr(int fd, const void *buf, size_t nbyte, uintptr_t addr) // Do not call this directly. // Use `get_self_mem_fd` which has a guard to call this only once. -static int _init_self_mem() +static int _init_self_mem() JL_NOTSAFEPOINT { struct utsname kernel; uname(&kernel); @@ -359,13 +359,13 @@ static int _init_self_mem() return fd; } -static int get_self_mem_fd() +static int get_self_mem_fd() JL_NOTSAFEPOINT { static int fd = _init_self_mem(); return fd; } -static void write_self_mem(void *dest, void *ptr, size_t size) +static void write_self_mem(void *dest, void *ptr, size_t size) JL_NOTSAFEPOINT { while (size > 0) { ssize_t ret = pwrite_addr(get_self_mem_fd(), ptr, size, (uintptr_t)dest); @@ -424,7 +424,7 @@ struct Block { Block(const Block&) = delete; Block &operator=(const Block&) = delete; - Block(Block &&other) + Block(Block &&other) JL_NOTSAFEPOINT : ptr(other.ptr), total(other.total), avail(other.avail) @@ -433,9 +433,9 @@ struct Block { other.total = other.avail = 0; } - Block() = default; + Block() JL_NOTSAFEPOINT = default; - void *alloc(size_t size, size_t align) + void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT { size_t aligned_avail = avail & (-align); if (aligned_avail < size) @@ -444,7 +444,7 @@ struct Block { avail = aligned_avail - size; return p; } - void reset(void *addr, size_t size) + void reset(void *addr, size_t size) JL_NOTSAFEPOINT { if (avail >= jl_page_size) { uintptr_t end = uintptr_t(ptr) + total; @@ -462,7 +462,8 @@ class RWAllocator { static constexpr int nblocks = 8; Block blocks[nblocks]{}; public: - void *alloc(size_t size, size_t align) + RWAllocator() JL_NOTSAFEPOINT = default; + void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT { size_t min_size = (size_t)-1; int min_id = 0; @@ -498,9 +499,9 @@ struct SplitPtrBlock : public Block { uintptr_t wr_ptr{0}; uint32_t state{0}; - SplitPtrBlock() = default; + SplitPtrBlock() JL_NOTSAFEPOINT = default; - void swap(SplitPtrBlock &other) + void swap(SplitPtrBlock &other) JL_NOTSAFEPOINT { std::swap(ptr, other.ptr); std::swap(total, other.total); @@ -509,7 +510,7 @@ struct SplitPtrBlock : public Block { std::swap(state, other.state); } - SplitPtrBlock(SplitPtrBlock &&other) + SplitPtrBlock(SplitPtrBlock &&other) JL_NOTSAFEPOINT : SplitPtrBlock() { swap(other); @@ -534,11 +535,12 @@ class ROAllocator { // but might not have all the permissions set or data copied yet. SmallVector completed; virtual void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, - size_t size, size_t align) = 0; - virtual SplitPtrBlock alloc_block(size_t size) = 0; + size_t size, size_t align) JL_NOTSAFEPOINT = 0; + virtual SplitPtrBlock alloc_block(size_t size) JL_NOTSAFEPOINT = 0; public: - virtual ~ROAllocator() {} - virtual void finalize() + ROAllocator() JL_NOTSAFEPOINT = default; + virtual ~ROAllocator() JL_NOTSAFEPOINT {} + virtual void finalize() JL_NOTSAFEPOINT { for (auto &alloc: allocations) { // ensure the mapped pages are consistent @@ -552,7 +554,7 @@ class ROAllocator { } // Allocations that have not been finalized yet. SmallVector allocations; - void *alloc(size_t size, size_t align) + void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT { size_t min_size = (size_t)-1; int min_id = 0; @@ -603,7 +605,7 @@ class ROAllocator { template class DualMapAllocator : public ROAllocator { protected: - void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, size_t, size_t) override + void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, size_t, size_t) override JL_NOTSAFEPOINT { assert((char*)rt_ptr >= block.ptr && (char*)rt_ptr < (block.ptr + block.total)); @@ -618,7 +620,7 @@ class DualMapAllocator : public ROAllocator { } return (char*)rt_ptr + (block.wr_ptr - uintptr_t(block.ptr)); } - SplitPtrBlock alloc_block(size_t size) override + SplitPtrBlock alloc_block(size_t size) override JL_NOTSAFEPOINT { SplitPtrBlock new_block; // use `wr_ptr` to record the id initially @@ -626,7 +628,7 @@ class DualMapAllocator : public ROAllocator { new_block.reset(ptr, size); return new_block; } - void finalize_block(SplitPtrBlock &block, bool reset) + void finalize_block(SplitPtrBlock &block, bool reset) JL_NOTSAFEPOINT { // This function handles setting the block to the right mode // and free'ing maps that are not needed anymore. @@ -662,11 +664,11 @@ class DualMapAllocator : public ROAllocator { } } public: - DualMapAllocator() + DualMapAllocator() JL_NOTSAFEPOINT { assert(anon_hdl != -1); } - void finalize() override + void finalize() override JL_NOTSAFEPOINT { for (auto &block : this->blocks) { finalize_block(block, false); @@ -685,7 +687,7 @@ class SelfMemAllocator : public ROAllocator { SmallVector temp_buff; protected: void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, - size_t size, size_t align) override + size_t size, size_t align) override JL_NOTSAFEPOINT { assert(!(block.state & SplitPtrBlock::InitAlloc)); for (auto &wr_block: temp_buff) { @@ -699,13 +701,13 @@ class SelfMemAllocator : public ROAllocator { new_block.reset(map_anon_page(block_size), block_size); return new_block.alloc(size, align); } - SplitPtrBlock alloc_block(size_t size) override + SplitPtrBlock alloc_block(size_t size) override JL_NOTSAFEPOINT { SplitPtrBlock new_block; new_block.reset(map_anon_page(size), size); return new_block; } - void finalize_block(SplitPtrBlock &block, bool reset) + void finalize_block(SplitPtrBlock &block, bool reset) JL_NOTSAFEPOINT { if (!(block.state & SplitPtrBlock::Alloc)) return; @@ -718,13 +720,13 @@ class SelfMemAllocator : public ROAllocator { } } public: - SelfMemAllocator() + SelfMemAllocator() JL_NOTSAFEPOINT : ROAllocator(), temp_buff() { assert(get_self_mem_fd() != -1); } - void finalize() override + void finalize() override JL_NOTSAFEPOINT { for (auto &block : this->blocks) { finalize_block(block, false); @@ -770,17 +772,15 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager { RWAllocator rw_alloc; std::unique_ptr> ro_alloc; std::unique_ptr> exe_alloc; - bool code_allocated; size_t total_allocated; public: - RTDyldMemoryManagerJL() + RTDyldMemoryManagerJL() JL_NOTSAFEPOINT : SectionMemoryManager(), pending_eh(), rw_alloc(), ro_alloc(), exe_alloc(), - code_allocated(false), total_allocated(0) { #ifdef _OS_LINUX_ @@ -794,12 +794,12 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager { exe_alloc.reset(new DualMapAllocator()); } } - ~RTDyldMemoryManagerJL() override + ~RTDyldMemoryManagerJL() override JL_NOTSAFEPOINT { } - size_t getTotalBytes() { return total_allocated; } + size_t getTotalBytes() JL_NOTSAFEPOINT { return total_allocated; } void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, - size_t Size) override; + size_t Size) override JL_NOTSAFEPOINT; #if 0 // Disable for now since we are not actually using this. void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, @@ -807,16 +807,16 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager { #endif uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, - StringRef SectionName) override; + StringRef SectionName) override JL_NOTSAFEPOINT; uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, StringRef SectionName, - bool isReadOnly) override; + bool isReadOnly) override JL_NOTSAFEPOINT; using SectionMemoryManager::notifyObjectLoaded; void notifyObjectLoaded(RuntimeDyld &Dyld, - const object::ObjectFile &Obj) override; - bool finalizeMemory(std::string *ErrMsg = nullptr) override; + const object::ObjectFile &Obj) override JL_NOTSAFEPOINT; + bool finalizeMemory(std::string *ErrMsg = nullptr) override JL_NOTSAFEPOINT; template - void mapAddresses(DL &Dyld, Alloc &&allocator) + void mapAddresses(DL &Dyld, Alloc &&allocator) JL_NOTSAFEPOINT { for (auto &alloc: allocator->allocations) { if (alloc.rt_addr == alloc.wr_addr || alloc.relocated) @@ -826,7 +826,7 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager { } } template - void mapAddresses(DL &Dyld) + void mapAddresses(DL &Dyld) JL_NOTSAFEPOINT { if (!ro_alloc) return; @@ -838,14 +838,9 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager { uint8_t *RTDyldMemoryManagerJL::allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, - StringRef SectionName) + StringRef SectionName) JL_NOTSAFEPOINT { // allocating more than one code section can confuse libunwind. -#if !defined(_COMPILER_MSAN_ENABLED_) && !defined(_COMPILER_ASAN_ENABLED_) - // TODO: Figure out why msan and now asan too need this. - assert(!code_allocated); - code_allocated = true; -#endif total_allocated += Size; jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size); jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, Size); @@ -859,7 +854,7 @@ uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, StringRef SectionName, - bool isReadOnly) + bool isReadOnly) JL_NOTSAFEPOINT { total_allocated += Size; jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size); @@ -873,7 +868,7 @@ uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size, } void RTDyldMemoryManagerJL::notifyObjectLoaded(RuntimeDyld &Dyld, - const object::ObjectFile &Obj) + const object::ObjectFile &Obj) JL_NOTSAFEPOINT { if (!ro_alloc) { assert(!exe_alloc); @@ -884,9 +879,8 @@ void RTDyldMemoryManagerJL::notifyObjectLoaded(RuntimeDyld &Dyld, mapAddresses(Dyld); } -bool RTDyldMemoryManagerJL::finalizeMemory(std::string *ErrMsg) +bool RTDyldMemoryManagerJL::finalizeMemory(std::string *ErrMsg) JL_NOTSAFEPOINT { - code_allocated = false; if (ro_alloc) { ro_alloc->finalize(); assert(exe_alloc); @@ -904,7 +898,7 @@ bool RTDyldMemoryManagerJL::finalizeMemory(std::string *ErrMsg) void RTDyldMemoryManagerJL::registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, - size_t Size) + size_t Size) JL_NOTSAFEPOINT { if (uintptr_t(Addr) == LoadAddr) { register_eh_frames(Addr, Size); @@ -917,7 +911,7 @@ void RTDyldMemoryManagerJL::registerEHFrames(uint8_t *Addr, #if 0 void RTDyldMemoryManagerJL::deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, - size_t Size) + size_t Size) JL_NOTSAFEPOINT { deregister_eh_frames((uint8_t*)LoadAddr, Size); } @@ -925,12 +919,12 @@ void RTDyldMemoryManagerJL::deregisterEHFrames(uint8_t *Addr, } -RTDyldMemoryManager* createRTDyldMemoryManager() +RTDyldMemoryManager* createRTDyldMemoryManager() JL_NOTSAFEPOINT { return new RTDyldMemoryManagerJL(); } -size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) +size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT { return ((RTDyldMemoryManagerJL*)mm)->getTotalBytes(); } diff --git a/src/clangsa/GCChecker.cpp b/src/clangsa/GCChecker.cpp index 31631eb70a4ad..4892ebdabd110 100644 --- a/src/clangsa/GCChecker.cpp +++ b/src/clangsa/GCChecker.cpp @@ -31,7 +31,7 @@ namespace { using namespace clang; using namespace ento; -#define PDP std::shared_ptr +typedef std::shared_ptr PDP; #define MakePDP make_unique static const Stmt *getStmtForDiagnostics(const ExplodedNode *N) @@ -394,13 +394,18 @@ PDP GCChecker::SafepointBugVisitor::VisitNode(const ExplodedNode *N, } else { PathDiagnosticLocation Pos = PathDiagnosticLocation::createDeclBegin( N->getLocationContext(), BRC.getSourceManager()); - return MakePDP(Pos, "Tracking JL_NOT_SAFEPOINT annotation here."); + if (Pos.isValid()) + return MakePDP(Pos, "Tracking JL_NOT_SAFEPOINT annotation here."); + //N->getLocation().dump(); } } else if (NewSafepointDisabled == (unsigned)-1) { PathDiagnosticLocation Pos = PathDiagnosticLocation::createDeclBegin( N->getLocationContext(), BRC.getSourceManager()); - return MakePDP(Pos, "Safepoints re-enabled here"); + if (Pos.isValid()) + return MakePDP(Pos, "Safepoints re-enabled here"); + //N->getLocation().dump(); } + // n.b. there may be no position here to report if they were disabled by julia_notsafepoint_enter/leave } return nullptr; } diff --git a/src/codegen.cpp b/src/codegen.cpp index 3f69f4789493a..0ab26a65fcaaa 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -233,7 +233,6 @@ STATISTIC(EmittedSpecfunCalls, "Number of specialized calls emitted"); STATISTIC(EmittedInvokes, "Number of invokes emitted"); STATISTIC(EmittedCalls, "Number of calls emitted"); STATISTIC(EmittedUndefVarErrors, "Number of undef var errors emitted"); -STATISTIC(EmittedOpaqueClosureFunctions, "Number of opaque closures emitted"); STATISTIC(EmittedToJLInvokes, "Number of tojlinvoke calls emitted"); STATISTIC(EmittedCFuncInvalidates, "Number of C function invalidates emitted"); STATISTIC(GeneratedCFuncWrappers, "Number of C function wrappers generated"); @@ -1009,6 +1008,11 @@ static const auto jlinvoke_func = new JuliaFunction<>{ {AttributeSet(), Attributes(C, {Attribute::ReadOnly, Attribute::NoCapture})}); }, }; +static const auto jlopaque_closure_call_func = new JuliaFunction<>{ + XSTR(jl_f_opaque_closure_call), + get_func_sig, + get_func_attrs, +}; static const auto jlmethod_func = new JuliaFunction<>{ XSTR(jl_method_def), [](LLVMContext &C) { @@ -1606,7 +1610,7 @@ static const auto jltuple_func = new JuliaFunction<>{XSTR(jl_f_tuple), get_func_ static const auto jlintrinsic_func = new JuliaFunction<>{XSTR(jl_f_intrinsic_call), get_func3_sig, get_func_attrs}; static const auto &builtin_func_map() { - static std::map*> builtins = { + static auto builtins = new DenseMap*> { { jl_f_is_addr, new JuliaFunction<>{XSTR(jl_f_is), get_func_sig, get_func_attrs} }, { jl_f_typeof_addr, new JuliaFunction<>{XSTR(jl_f_typeof), get_func_sig, get_func_attrs} }, { jl_f_sizeof_addr, new JuliaFunction<>{XSTR(jl_f_sizeof), get_func_sig, get_func_attrs} }, @@ -1649,18 +1653,18 @@ static const auto &builtin_func_map() { { jl_f__svec_ref_addr, new JuliaFunction<>{XSTR(jl_f__svec_ref), get_func_sig, get_func_attrs} }, { jl_f_current_scope_addr, new JuliaFunction<>{XSTR(jl_f_current_scope), get_func_sig, get_func_attrs} }, }; - return builtins; + return *builtins; } static const auto &may_dispatch_builtins() { - static std::unordered_set builtins( + static auto builtins = new DenseSet( {jl_f__apply_iterate_addr, jl_f__apply_pure_addr, jl_f__call_in_world_addr, jl_f__call_in_world_total_addr, jl_f__call_latest_addr, }); - return builtins; + return *builtins; } static const auto jl_new_opaque_closure_jlcall_func = new JuliaFunction<>{XSTR(jl_new_opaque_closure_jlcall), get_func_sig, get_func_attrs}; @@ -2976,7 +2980,7 @@ static void jl_name_jlfuncparams_args(jl_codegen_params_t ¶ms, Function *F) F->getArg(3)->setName("sparams::Any"); } -static void jl_init_function(Function *F, const Triple &TT) +void jl_init_function(Function *F, const Triple &TT) { // set any attributes that *must* be set on all functions AttrBuilder attr(F->getContext()); @@ -3023,6 +3027,7 @@ static bool uses_specsig(jl_value_t *sig, bool needsparams, jl_value_t *rettype, if (jl_vararg_kind(jl_tparam(sig, jl_nparams(sig) - 1)) == JL_VARARG_UNBOUND) return false; // not invalid, consider if specialized signature is worthwhile + // n.b. sig is sometimes wrong for OC (tparam0 might be the captures type of the specialization, even though what gets passed in that slot is an OC object), so prefer_specsig is always set (instead of recomputing tparam0 using get_oc_type) if (prefer_specsig) return true; if (!deserves_retbox(rettype) && !jl_is_datatype_singleton((jl_datatype_t*)rettype) && rettype != (jl_value_t*)jl_bool_type) @@ -5236,7 +5241,15 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *t if (theF) theArgs.push_back(theF); for (size_t i = 0; i < nargs; i++) { - Value *arg = boxed(ctx, argv[i]); + Value *arg; + if (i == 0 && trampoline == julia_call3) { + const jl_cgval_t &f = argv[i]; + arg = f.inline_roots.empty() && f.ispointer() ? data_pointer(ctx, f) : value_to_pointer(ctx, f).V; + arg = decay_derived(ctx, arg); + } + else { + arg = boxed(ctx, argv[i]); + } theArgs.push_back(arg); } CallInst *result = ctx.builder.CreateCall(TheTrampoline, theArgs); @@ -5283,13 +5296,13 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos idx++; } for (size_t i = 0; i < nargs; i++) { - jl_value_t *jt = jl_nth_slot_type(specTypes, i); // n.b.: specTypes is required to be a datatype by construction for specsig if (is_opaque_closure && i == 0) { // Special implementation for opaque closures: their jt and thus - // julia_type_to_llvm values are likely wrong, so override the - // behavior here to directly pass the expected pointer based instead - // just on passing arg as a pointer + // julia_type_to_llvm values are likely wrong (based on captures instead of the OC), so override the + // behavior here to directly pass the expected pointer directly instead of + // computing it from the available information + // jl_value_t *oc_type = (jl_value_t*)jl_any_type; // more accurately: get_oc_type(specTypes, jlretty) jl_cgval_t arg = argv[i]; if (arg.isghost) { argvals[idx] = Constant::getNullValue(ctx.builder.getPtrTy(AddressSpace::Derived)); @@ -5302,6 +5315,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos idx++; continue; } + jl_value_t *jt = jl_nth_slot_type(specTypes, i); jl_cgval_t arg = update_julia_type(ctx, argv[i], jt); if (arg.typ == jl_bottom_type) return jl_cgval_t(); @@ -5519,6 +5533,7 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayR // Check if we already queued this up auto it = ctx.call_targets.find(codeinst); if (need_to_emit && it != ctx.call_targets.end()) { + assert(it->second.specsig == specsig); protoname = it->second.decl->getName(); need_to_emit = cache_valid = false; } @@ -5559,7 +5574,7 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayR handled = true; if (need_to_emit) { Function *trampoline_decl = cast(jl_Module->getNamedValue(protoname)); - ctx.call_targets[codeinst] = {cc, return_roots, trampoline_decl, specsig}; + ctx.call_targets[codeinst] = {cc, return_roots, trampoline_decl, nullptr, specsig}; if (trim_may_error(ctx.params->trim)) push_frames(ctx, ctx.linfo, mi); } @@ -5570,9 +5585,9 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayR if (!handled) { if (trim_may_error(ctx.params->trim)) { if (lival.constant) { - arraylist_push(&new_invokes, lival.constant); push_frames(ctx, ctx.linfo, (jl_method_instance_t*)lival.constant); - } else { + } + else { errs() << "Dynamic call to unknown function"; errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n"; @@ -5728,20 +5743,16 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo } } FunctionCallee fptr; - Value *F; JuliaFunction<> *cc; if (f.typ == (jl_value_t*)jl_intrinsic_type) { fptr = prepare_call(jlintrinsic_func); - F = f.inline_roots.empty() && f.ispointer() ? data_pointer(ctx, f) : value_to_pointer(ctx, f).V; - F = decay_derived(ctx, F); cc = julia_call3; } else { fptr = FunctionCallee(get_func_sig(ctx.builder.getContext()), ctx.builder.CreateCall(prepare_call(jlgetbuiltinfptr_func), {emit_typeof(ctx, f)})); - F = boxed(ctx, f); cc = julia_call; } - Value *ret = emit_jlcall(ctx, fptr, F, ArrayRef(argv).drop_front(), nargs - 1, cc); + Value *ret = emit_jlcall(ctx, fptr, nullptr, argv, nargs, cc); setName(ctx.emission_context, ret, "Builtin_ret"); return mark_julia_type(ctx, ret, true, rt); } @@ -5758,52 +5769,12 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo JL_GC_POP(); return r; } + // TODO: else emit_oc_call } } int failed_dispatch = !argv[0].constant; if (ctx.params->trim != JL_TRIM_NO) { - size_t min_valid = 1; - size_t max_valid = ~(size_t)0; - size_t latest_world = jl_get_world_counter(); // TODO: marshal the world age of the compilation here. - - // Find all methods matching the call signature - jl_array_t *matches = NULL; - jl_value_t *tup = NULL; - JL_GC_PUSH2(&tup, &matches); - if (!failed_dispatch) { - SmallVector argtypes; - for (auto& arg: argv) - argtypes.push_back(arg.typ); - tup = jl_apply_tuple_type_v(argtypes.data(), argtypes.size()); - matches = (jl_array_t*)jl_matching_methods((jl_tupletype_t*)tup, jl_nothing, 10 /*TODO: make global*/, 1, - latest_world, &min_valid, &max_valid, NULL); - if ((jl_value_t*)matches == jl_nothing) - failed_dispatch = 1; - } - - // Expand each matching method to its unique specialization, if it has exactly one - if (!failed_dispatch) { - size_t k; - size_t len = new_invokes.len; - for (k = 0; k < jl_array_nrows(matches); k++) { - jl_method_match_t *match = (jl_method_match_t *)jl_array_ptr_ref(matches, k); - jl_method_instance_t *mi = jl_method_match_to_mi(match, latest_world, min_valid, max_valid, 0); - if (!mi) { - if (jl_array_nrows(matches) == 1) { - // if the method match is not compileable, but there is only one, fall back to - // unspecialized implementation - mi = jl_get_unspecialized(match->method); - } - else { - new_invokes.len = len; - failed_dispatch = 1; - break; - } - } - arraylist_push(&new_invokes, mi); - } - } - JL_GC_POP(); + abort(); // this code path is unsound, unsafe, and probably bad } if (failed_dispatch && trim_may_error(ctx.params->trim)) { @@ -6634,66 +6605,73 @@ static std::pair get_oc_function(jl_codectx_t &ctx, jl_met assert(jl_is_method_instance(mi)); ci = jl_atomic_load_relaxed(&mi->cache); } - - if (ci == NULL || (jl_value_t*)ci == jl_nothing) { - JL_GC_POP(); - return std::make_pair((Function*)NULL, (Function*)NULL); - } - auto inferred = jl_atomic_load_relaxed(&ci->inferred); - if (!inferred || inferred == jl_nothing) { + if (ci == NULL || (jl_value_t*)ci == jl_nothing || ci->rettype != rettype || !jl_egal(sigtype, mi->specTypes)) { // TODO: correctly handle the ABI conversion if rettype != ci->rettype JL_GC_POP(); return std::make_pair((Function*)NULL, (Function*)NULL); } - auto it = ctx.emission_context.compiled_functions.find(ci); - - if (it == ctx.emission_context.compiled_functions.end()) { - ++EmittedOpaqueClosureFunctions; - jl_code_info_t *ir = jl_uncompress_ir(closure_method, ci, (jl_value_t*)inferred); - JL_GC_PUSH1(&ir); - // TODO: Emit this inline and outline it late using LLVM's coroutine support. - orc::ThreadSafeModule closure_m = jl_create_ts_module( - name_from_method_instance(mi), ctx.emission_context.tsctx, - jl_Module->getDataLayout(), Triple(jl_Module->getTargetTriple())); - jl_llvm_functions_t closure_decls = emit_function(closure_m, mi, ir, rettype, ctx.emission_context); - JL_GC_POP(); - it = ctx.emission_context.compiled_functions.insert(std::make_pair(ci, std::make_pair(std::move(closure_m), std::move(closure_decls)))).first; + // method lookup code (similar to emit_invoke, and the inverse of emit_specsig_oc_call) + bool specsig = uses_specsig(sigtype, false, rettype, true); + std::string name; + std::string oc; + StringRef protoname; + StringRef proto_oc; + + // Check if we already queued this up + auto it = ctx.call_targets.find(ci); + bool need_to_emit = it == ctx.call_targets.end(); + if (!need_to_emit) { + assert(specsig == it->second.specsig); + if (specsig) { + protoname = it->second.decl->getName(); + proto_oc = it->second.oc->getName(); + } + else { + proto_oc = it->second.decl->getName(); + } + need_to_emit = false; + } + else { + if (specsig) { + raw_string_ostream(name) << "j_" << name_from_method_instance(mi) << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); + protoname = StringRef(name); + } + raw_string_ostream(oc) << "j1_" << name_from_method_instance(mi) << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); + proto_oc = StringRef(oc); } - auto &closure_m = it->second.first; - auto &closure_decls = it->second.second; - - assert(closure_decls.functionObject != "jl_fptr_sparam"); - bool isspecsig = closure_decls.functionObject != "jl_fptr_args"; - - Function *F = NULL; - std::string fname = isspecsig ? - closure_decls.functionObject : - closure_decls.specFunctionObject; - if (GlobalValue *V = jl_Module->getNamedValue(fname)) { + // Get the fptr1 OC + Function *F = nullptr; + if (GlobalValue *V = jl_Module->getNamedValue(proto_oc)) { F = cast(V); } else { F = Function::Create(get_func_sig(ctx.builder.getContext()), Function::ExternalLinkage, - fname, jl_Module); + proto_oc, jl_Module); jl_init_function(F, ctx.emission_context.TargetTriple); jl_name_jlfunc_args(ctx.emission_context, F); F->setAttributes(AttributeList::get(ctx.builder.getContext(), {get_func_attrs(ctx.builder.getContext()), F->getAttributes()})); } - Function *specF = NULL; - if (!isspecsig) { - specF = F; + + // Get the specsig (if applicable) + Function *specF = nullptr; + jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed; + unsigned return_roots = 0; + bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure; + assert(is_opaque_closure); + if (specsig) { + bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg); + jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, nullptr, protoname, mi->specTypes, rettype, is_opaque_closure, gcstack_arg); + cc = returninfo.cc; + return_roots = returninfo.return_roots; + specF = cast(returninfo.decl.getCallee()); } - else { - //emission context holds context lock so can get module - specF = closure_m.getModuleUnlocked()->getFunction(closure_decls.specFunctionObject); - if (specF) { - jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, NULL, - closure_decls.specFunctionObject, sigtype, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg)); - specF = cast(returninfo.decl.getCallee()); - } + + if (need_to_emit) { + ctx.call_targets[ci] = {cc, return_roots, specsig ? specF : F, specsig ? F : nullptr, specsig}; } + JL_GC_POP(); return std::make_pair(F, specF); } @@ -7173,7 +7151,12 @@ static Value *get_scope_field(jl_codectx_t &ctx) return emit_ptrgep(ctx, ct, offsetof(jl_task_t, scope), "current_scope"); } -static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptrName, Module *M, jl_codegen_params_t ¶ms) +Function *get_or_emit_fptr1(StringRef preal_decl, Module *M) +{ + return cast(M->getOrInsertFunction(preal_decl, get_func_sig(M->getContext()), get_func_attrs(M->getContext())).getCallee()); +} + +Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptrName, Module *M, jl_codegen_params_t ¶ms) JL_NOTSAFEPOINT { ++EmittedToJLInvokes; jl_codectx_t ctx(M->getContext(), params, codeinst); @@ -7184,7 +7167,6 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptr name, M); jl_init_function(f, params.TargetTriple); if (trim_may_error(params.params->trim)) { - arraylist_push(&new_invokes, codeinst->def); // Try t compile this invoke // TODO: Debuginfo! push_frames(ctx, ctx.linfo, codeinst->def, 1); } @@ -7213,7 +7195,17 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptr return f; } -static void emit_cfunc_invalidate( +static jl_value_t *get_oc_type(jl_value_t *calltype, jl_value_t *rettype) JL_ALWAYS_LEAFTYPE +{ + jl_value_t *argtype = jl_argtype_without_function((jl_value_t*)calltype); + JL_GC_PUSH1(&argtype); + jl_value_t *oc_type JL_ALWAYS_LEAFTYPE = jl_apply_type2((jl_value_t*)jl_opaque_closure_type, argtype, rettype); + JL_GC_PROMISE_ROOTED(oc_type); + JL_GC_POP(); + return oc_type; +} + +void emit_specsig_to_fptr1( Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots, jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure, size_t nargs, @@ -7240,14 +7232,18 @@ static void emit_cfunc_invalidate( ++AI; // gcstack_arg } for (size_t i = 0; i < nargs; i++) { - // n.b. calltype is required to be a datatype by construction for specsig - jl_value_t *jt = jl_nth_slot_type(calltype, i); if (i == 0 && is_for_opaque_closure) { + // `jt` would be wrong here (it is the captures type), so is not used used for + // the ABI decisions, but the argument actually will require boxing as its real type + // which can be exactly recomputed from the specialization, as that defined the ABI + jl_value_t *oc_type = get_oc_type(calltype, rettype); Value *arg_v = &*AI; ++AI; - myargs[i] = mark_julia_slot(arg_v, jt, NULL, ctx.tbaa().tbaa_const); + myargs[i] = mark_julia_slot(arg_v, (jl_value_t*)oc_type, NULL, ctx.tbaa().tbaa_const); continue; } + // n.b. calltype is required to be a datatype by construction for specsig + jl_value_t *jt = jl_nth_slot_type(calltype, i); bool isboxed = false; Type *et; if (deserves_argbox(jt)) { @@ -7335,16 +7331,6 @@ static void emit_cfunc_invalidate( } } -static void emit_cfunc_invalidate( - Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots, - jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure, - size_t nargs, jl_codegen_params_t ¶ms, - size_t min_world, size_t max_world) -{ - emit_cfunc_invalidate(gf_thunk, cc, return_roots, calltype, rettype, is_for_opaque_closure, nargs, params, - prepare_call_in(gf_thunk->getParent(), jlapplygeneric_func), min_world, max_world); -} - static Function* gen_cfun_wrapper( Module *into, jl_codegen_params_t ¶ms, const function_sig_t &sig, jl_value_t *ff, const char *aliasname, @@ -7712,11 +7698,11 @@ static Function* gen_cfun_wrapper( GlobalVariable::InternalLinkage, funcName, M); jl_init_function(gf_thunk, ctx.emission_context.TargetTriple); gf_thunk->setAttributes(AttributeList::get(M->getContext(), {returninfo.attrs, gf_thunk->getAttributes()})); - // build a specsig -> jl_apply_generic converter thunk + // build a specsig -> jl_apply_generic converter thunk // this builds a method that calls jl_apply_generic (as a closure over a singleton function pointer), // but which has the signature of a specsig - emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, is_opaque_closure, nargs + 1, ctx.emission_context, - min_world, max_world); + emit_specsig_to_fptr1(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, is_opaque_closure, nargs + 1, ctx.emission_context, + prepare_call_in(gf_thunk->getParent(), jlapplygeneric_func), min_world, max_world); returninfo.decl = FunctionCallee(returninfo.decl.getFunctionType(), ctx.builder.CreateSelect(age_ok, returninfo.decl.getCallee(), gf_thunk)); } retval = emit_call_specfun_other(ctx, is_opaque_closure, lam->specTypes, codeinst->rettype, returninfo, nullptr, inputargs, nargs + 1); @@ -8026,7 +8012,8 @@ const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysi } // generate a julia-callable function that calls f (AKA lam) -static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, jl_returninfo_t &f, unsigned nargs, int retarg, StringRef funcName, +// if is_opaque_closure, then generate the OC invoke, rather than a real invoke +static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, jl_returninfo_t &f, unsigned nargs, int retarg, bool is_opaque_closure, StringRef funcName, Module *M, jl_codegen_params_t ¶ms) { ++GeneratedInvokeWrappers; @@ -8055,11 +8042,14 @@ static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, j allocate_gc_frame(ctx, b0); SmallVector argv(nargs); - bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure; jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); for (size_t i = 0; i < nargs; ++i) { - jl_value_t *ty = ((i == 0) && is_opaque_closure) ? (jl_value_t*)jl_any_type : - jl_nth_slot_type(lam->specTypes, i); + if (i == 0 && is_opaque_closure) { + jl_value_t *oc_type = (jl_value_t*)jl_any_type; // more accurately: get_oc_type(lam->specTypes, jlretty) + argv[i] = mark_julia_slot(funcArg, oc_type, NULL, ctx.tbaa().tbaa_const); + continue; + } + jl_value_t *ty = jl_nth_slot_type(lam->specTypes, i); Value *theArg; if (i == 0) { theArg = funcArg; @@ -8455,6 +8445,7 @@ static jl_llvm_functions_t // specTypes is required to be a datatype by construction for specsig, but maybe not otherwise // OpaqueClosure implicitly loads the env if (i == 0 && ctx.is_opaque_closure) { + // n.b. this is not really needed, because ty was already supposed to be correct if (jl_is_array(src->slottypes)) { ty = jl_array_ptr_ref((jl_array_t*)src->slottypes, i); } @@ -8554,7 +8545,7 @@ static jl_llvm_functions_t raw_string_ostream(wrapName) << "jfptr_" << ctx.name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); declarations.functionObject = wrapName; size_t nparams = jl_nparams(lam->specTypes); - gen_invoke_wrapper(lam, jlrettype, returninfo, nparams, retarg, declarations.functionObject, M, ctx.emission_context); + gen_invoke_wrapper(lam, jlrettype, returninfo, nparams, retarg, ctx.is_opaque_closure, declarations.functionObject, M, ctx.emission_context); // TODO: add attributes: maybe_mark_argument_dereferenceable(Arg, argType) // TODO: add attributes: dereferenceable // TODO: (if needsparams) add attributes: dereferenceable, readonly, nocapture @@ -8564,11 +8555,10 @@ static jl_llvm_functions_t GlobalVariable::ExternalLinkage, declarations.specFunctionObject, M); jl_init_function(f, ctx.emission_context.TargetTriple); - if (needsparams) { + if (needsparams) jl_name_jlfuncparams_args(ctx.emission_context, f); - } else { + else jl_name_jlfunc_args(ctx.emission_context, f); - } f->setAttributes(AttributeList::get(ctx.builder.getContext(), {get_func_attrs(ctx.builder.getContext()), f->getAttributes()})); returninfo.decl = f; declarations.functionObject = needsparams ? "jl_fptr_sparam" : "jl_fptr_args"; @@ -8940,76 +8930,73 @@ static jl_llvm_functions_t } for (i = 0; i < nreq && i < vinfoslen; i++) { jl_sym_t *s = slot_symbol(ctx, i); - jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i); - // TODO: jl_nth_slot_type should call jl_rewrap_unionall? - // specTypes is required to be a datatype by construction for specsig, but maybe not otherwise - bool isboxed = deserves_argbox(argType); - Type *llvmArgType = NULL; - if (i == 0 && ctx.is_opaque_closure) { - isboxed = false; - llvmArgType = ctx.builder.getPtrTy(AddressSpace::Derived); - argType = (jl_value_t*)jl_any_type; - } - else { - llvmArgType = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType); - } jl_varinfo_t &vi = ctx.slots[i]; - if (s == jl_unused_sym || vi.value.constant) { - assert(vi.boxroot == NULL); - if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType)) { - ++AI; - auto tracked = CountTrackedPointers(llvmArgType); - if (tracked.count && !tracked.all) - ++AI; - } - continue; - } jl_cgval_t theArg; - // If this is an opaque closure, implicitly load the env and switch - // the world age. if (i == 0 && ctx.is_opaque_closure) { + // If this is an opaque closure, implicitly load the env and switch + // the world age. The specTypes value is wrong for this field, so + // this needs to be handled first. + // jl_value_t *oc_type = get_oc_type(calltype, rettype); + Value *oc_this = decay_derived(ctx, &*AI); + ++AI; // both specsig (derived) and fptr1 (box) pass this argument as a distinct argument // Load closure world - Value *oc_this = decay_derived(ctx, &*AI++); - Value *argaddr = oc_this; - Value *worldaddr = emit_ptrgep(ctx, argaddr, offsetof(jl_opaque_closure_t, world)); - + Value *worldaddr = emit_ptrgep(ctx, oc_this, offsetof(jl_opaque_closure_t, world)); jl_cgval_t closure_world = typed_load(ctx, worldaddr, NULL, (jl_value_t*)jl_long_type, nullptr, nullptr, false, AtomicOrdering::NotAtomic, false, ctx.types().alignof_ptr.value()); ctx.world_age_at_entry = closure_world.V; // The tls world in a OC is the world of the closure emit_unbox_store(ctx, closure_world, world_age_field, ctx.tbaa().tbaa_gcframe, ctx.types().alignof_ptr); - // Load closure env - Value *envaddr = emit_ptrgep(ctx, argaddr, offsetof(jl_opaque_closure_t, captures)); + if (s == jl_unused_sym || vi.value.constant) + continue; - jl_cgval_t closure_env = typed_load(ctx, envaddr, NULL, (jl_value_t*)jl_any_type, - nullptr, nullptr, true, AtomicOrdering::NotAtomic, false, sizeof(void*)); - theArg = update_julia_type(ctx, closure_env, vi.value.typ); - } - else if (specsig) { - theArg = get_specsig_arg(argType, llvmArgType, isboxed); + // Load closure env, which is always a boxed value (usually some Tuple) currently + Value *envaddr = emit_ptrgep(ctx, oc_this, offsetof(jl_opaque_closure_t, captures)); + theArg = typed_load(ctx, envaddr, NULL, (jl_value_t*)vi.value.typ, + nullptr, nullptr, /*isboxed*/true, AtomicOrdering::NotAtomic, false, sizeof(void*)); } else { - if (i == 0) { - // first (function) arg is separate in jlcall - theArg = mark_julia_type(ctx, fArg, true, vi.value.typ); + jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i); + // TODO: jl_nth_slot_type should call jl_rewrap_unionall? + // specTypes is required to be a datatype by construction for specsig, but maybe not otherwise + bool isboxed = deserves_argbox(argType); + Type *llvmArgType = NULL; + llvmArgType = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType); + if (s == jl_unused_sym || vi.value.constant) { + assert(vi.boxroot == NULL); + if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType)) { + ++AI; + auto tracked = CountTrackedPointers(llvmArgType); + if (tracked.count && !tracked.all) + ++AI; + } + continue; + } + if (specsig) { + theArg = get_specsig_arg(argType, llvmArgType, isboxed); } else { - Value *argPtr = emit_ptrgep(ctx, argArray, (i - 1) * ctx.types().sizeof_ptr); - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); - Value *load = ai.decorateInst(maybe_mark_load_dereferenceable( - ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))), - false, vi.value.typ)); - theArg = mark_julia_type(ctx, load, true, vi.value.typ); - if (debug_enabled && vi.dinfo && !vi.boxroot) { - SmallVector addr; - addr.push_back(llvm::dwarf::DW_OP_deref); - addr.push_back(llvm::dwarf::DW_OP_plus_uconst); - addr.push_back((i - 1) * sizeof(void*)); - if ((Metadata*)vi.dinfo->getType() != debugcache.jl_pvalue_dillvmt) + if (i == 0) { + // first (function) arg is separate in jlcall + theArg = mark_julia_type(ctx, fArg, true, vi.value.typ); + } + else { + Value *argPtr = emit_ptrgep(ctx, argArray, (i - 1) * ctx.types().sizeof_ptr); + jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); + Value *load = ai.decorateInst(maybe_mark_load_dereferenceable( + ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))), + false, vi.value.typ)); + theArg = mark_julia_type(ctx, load, true, vi.value.typ); + if (debug_enabled && vi.dinfo && !vi.boxroot) { + SmallVector addr; addr.push_back(llvm::dwarf::DW_OP_deref); - dbuilder.insertDeclare(pargArray, vi.dinfo, dbuilder.createExpression(addr), - topdebugloc, - ctx.builder.GetInsertBlock()); + addr.push_back(llvm::dwarf::DW_OP_plus_uconst); + addr.push_back((i - 1) * sizeof(void*)); + if ((Metadata*)vi.dinfo->getType() != debugcache.jl_pvalue_dillvmt) + addr.push_back(llvm::dwarf::DW_OP_deref); + dbuilder.insertDeclare(pargArray, vi.dinfo, dbuilder.createExpression(addr), + topdebugloc, + ctx.builder.GetInsertBlock()); + } } } } @@ -9996,7 +9983,6 @@ jl_llvm_functions_t jl_emit_code( { JL_TIMING(CODEGEN, CODEGEN_LLVM); jl_timing_show_func_sig((jl_value_t *)li->specTypes, JL_TIMING_DEFAULT_BLOCK); - // caller must hold codegen_lock jl_llvm_functions_t decls = {}; assert((params.params == &jl_default_cgparams /* fast path */ || !params.cache || compare_cgparams(params.params, &jl_default_cgparams)) && @@ -10031,33 +10017,38 @@ jl_llvm_functions_t jl_emit_code( return decls; } +static int effects_foldable(uint32_t effects) +{ + // N.B.: This needs to be kept in sync with Core.Compiler.is_foldable(effects, true) + return ((effects & 0x7) == 0) && // is_consistent(effects) + (((effects >> 10) & 0x03) == 0) && // is_noub(effects) + (((effects >> 3) & 0x03) == 0) && // is_effect_free(effects) + ((effects >> 6) & 0x01); // is_terminates(effects) +} + static jl_llvm_functions_t jl_emit_oc_wrapper(orc::ThreadSafeModule &m, jl_codegen_params_t ¶ms, jl_method_instance_t *mi, jl_value_t *rettype) { - Module *M = m.getModuleUnlocked(); - jl_codectx_t ctx(M->getContext(), params, 0, 0); - ctx.name = M->getModuleIdentifier().data(); - std::string funcName = get_function_name(true, false, ctx.name, ctx.emission_context.TargetTriple); jl_llvm_functions_t declarations; declarations.functionObject = "jl_f_opaque_closure_call"; if (uses_specsig(mi->specTypes, false, rettype, true)) { + // context lock is held by params + Module *M = m.getModuleUnlocked(); + jl_codectx_t ctx(M->getContext(), params, 0, 0); + ctx.name = M->getModuleIdentifier().data(); + std::string funcName = get_function_name(true, false, ctx.name, ctx.emission_context.TargetTriple); jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, funcName, mi->specTypes, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg)); Function *gf_thunk = cast(returninfo.decl.getCallee()); jl_init_function(gf_thunk, ctx.emission_context.TargetTriple); size_t nrealargs = jl_nparams(mi->specTypes); - emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, mi->specTypes, rettype, true, nrealargs, ctx.emission_context, ctx.min_world, ctx.max_world); + emit_specsig_to_fptr1(gf_thunk, returninfo.cc, returninfo.return_roots, + mi->specTypes, rettype, true, nrealargs, ctx.emission_context, + prepare_call_in(gf_thunk->getParent(), jlopaque_closure_call_func), // TODO: this could call emit_oc_call directly + ctx.min_world, ctx.max_world); declarations.specFunctionObject = funcName; } return declarations; } -static int effects_foldable(uint32_t effects) -{ - // N.B.: This needs to be kept in sync with Core.Compiler.is_foldable(effects, true) - return ((effects & 0x7) == 0) && // is_consistent(effects) - (((effects >> 10) & 0x03) == 0) && // is_noub(effects) - (((effects >> 3) & 0x03) == 0) && // is_effect_free(effects) - ((effects >> 6) & 0x01); // is_terminates(effects) -} jl_llvm_functions_t jl_emit_codeinst( orc::ThreadSafeModule &m, @@ -10070,12 +10061,14 @@ jl_llvm_functions_t jl_emit_codeinst( JL_GC_PUSH1(&src); if (!src) { src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred); - jl_method_t *def = codeinst->def->def.method; + jl_method_instance_t *mi = codeinst->def; + jl_method_t *def = mi->def.method; // Check if this is the generic method for opaque closure wrappers - - // if so, generate the specsig -> invoke converter. + // if so, this must compile specptr such that it holds the specptr -> invoke wrapper + // to satisfy the dispatching implementation requirements of jl_f_opaque_closure_call if (def == jl_opaque_closure_method) { JL_GC_POP(); - return jl_emit_oc_wrapper(m, params, codeinst->def, codeinst->rettype); + return jl_emit_oc_wrapper(m, params, mi, codeinst->rettype); } if (src && (jl_value_t*)src != jl_nothing && jl_is_method(def)) src = jl_uncompress_ir(def, codeinst, (jl_value_t*)src); @@ -10149,135 +10142,15 @@ jl_llvm_functions_t jl_emit_codeinst( return decls; } - -void jl_compile_workqueue( - jl_codegen_params_t ¶ms, - CompilationPolicy policy) -{ - JL_TIMING(CODEGEN, CODEGEN_Workqueue); - jl_code_info_t *src = NULL; - JL_GC_PUSH1(&src); - while (!params.workqueue.empty()) { - jl_code_instance_t *codeinst; - auto it = params.workqueue.back(); - codeinst = it.first; - auto proto = it.second; - params.workqueue.pop_back(); - // try to emit code for this item from the workqueue - StringRef preal_decl = ""; - bool preal_specsig = false; - jl_callptr_t invoke = NULL; - if (params.cache) { - // WARNING: this correctness is protected by an outer lock - uint8_t specsigflags; - void *fptr; - jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &fptr, 0); - //if (specsig ? specsigflags & 0b1 : invoke == jl_fptr_args_addr) - if (invoke == jl_fptr_args_addr) { - preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst); - } - else if (specsigflags & 0b1) { - preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst); - preal_specsig = true; - } - } - if (preal_decl.empty()) { - auto it = params.compiled_functions.find(codeinst); - if (it == params.compiled_functions.end()) { - // Reinfer the function. The JIT came along and removed the inferred - // method body. See #34993 - if ((policy != CompilationPolicy::Default || params.params->trim) && - jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) { - // XXX: SOURCE_MODE_FORCE_SOURCE is wrong here (neither sufficient nor necessary) - codeinst = jl_type_infer(codeinst->def, jl_atomic_load_relaxed(&codeinst->max_world), SOURCE_MODE_FORCE_SOURCE); - } - if (codeinst) { - orc::ThreadSafeModule result_m = - jl_create_ts_module(name_from_method_instance(codeinst->def), - params.tsctx, params.DL, params.TargetTriple); - auto decls = jl_emit_codeinst(result_m, codeinst, NULL, params); - if (result_m) - it = params.compiled_functions.insert(std::make_pair(codeinst, std::make_pair(std::move(result_m), std::move(decls)))).first; - } - } - if (it != params.compiled_functions.end()) { - auto &decls = it->second.second; - if (decls.functionObject == "jl_fptr_args") { - preal_decl = decls.specFunctionObject; - } - else if (decls.functionObject != "jl_fptr_sparam") { - preal_decl = decls.specFunctionObject; - preal_specsig = true; - } - } - } - // patch up the prototype we emitted earlier - Module *mod = proto.decl->getParent(); - assert(proto.decl->isDeclaration()); - if (proto.specsig) { - // expected specsig - if (!preal_specsig) { - if (params.params->trim) { - auto it = params.compiled_functions.find(codeinst); //TODO: What to do about this - errs() << "Bailed out to invoke when compiling:"; - jl_(codeinst->def); - if (it != params.compiled_functions.end()) { - errs() << it->second.second.functionObject << "\n"; - errs() << it->second.second.specFunctionObject << "\n"; - } else - errs() << "codeinst not in compile_functions\n"; - } - // emit specsig-to-(jl)invoke conversion - StringRef invokeName; - if (invoke != NULL) - invokeName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, invoke, codeinst); - Function *preal = emit_tojlinvoke(codeinst, invokeName, mod, params); - proto.decl->setLinkage(GlobalVariable::InternalLinkage); - //protodecl->setAlwaysInline(); - jl_init_function(proto.decl, params.TargetTriple); - size_t nrealargs = jl_nparams(codeinst->def->specTypes); // number of actual arguments being passed - // TODO: maybe this can be cached in codeinst->specfptr? - emit_cfunc_invalidate(proto.decl, proto.cc, proto.return_roots, codeinst->def->specTypes, codeinst->rettype, false, nrealargs, params, preal, 0, 0); - preal_decl = ""; // no need to fixup the name - } - else { - assert(!preal_decl.empty()); - } - } - else { - // expected non-specsig - if (preal_decl.empty() || preal_specsig) { - // emit jlcall1-to-(jl)invoke conversion - StringRef invokeName; - if (invoke != NULL) - invokeName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, invoke, codeinst); - preal_decl = emit_tojlinvoke(codeinst, invokeName, mod, params)->getName(); - } - } - if (!preal_decl.empty()) { - // merge and/or rename this prototype to the real function - if (Value *specfun = mod->getNamedValue(preal_decl)) { - if (proto.decl != specfun) - proto.decl->replaceAllUsesWith(specfun); - } - else { - proto.decl->setName(preal_decl); - } - } - } - JL_GC_POP(); -} - - // --- initialization --- -SmallVector, 0> gv_for_global; +static auto gv_for_global = new SmallVector, 0>(); static void global_jlvalue_to_llvm(JuliaVariable *var, jl_value_t **addr) { - gv_for_global.push_back(std::make_pair(addr, var)); + gv_for_global->push_back(std::make_pair(addr, var)); } static JuliaVariable *julia_const_gv(jl_value_t *val) { - for (auto &kv : gv_for_global) { + for (auto &kv : *gv_for_global) { if (*kv.first == val) return kv.second; } @@ -10286,6 +10159,9 @@ static JuliaVariable *julia_const_gv(jl_value_t *val) static void init_jit_functions(void) { + add_named_global("jl_fptr_args", jl_fptr_args_addr); + add_named_global("jl_fptr_sparam", jl_fptr_sparam_addr); + add_named_global("jl_f_opaque_closure_call", &jl_f_opaque_closure_call); add_named_global(jl_small_typeof_var, &jl_small_typeof); add_named_global(jlstack_chk_guard_var, &__stack_chk_guard); add_named_global(jlRTLD_DEFAULT_var, &jl_RTLD_DEFAULT_handle); diff --git a/src/debug-registry.h b/src/debug-registry.h index 4c9e13d8cd72d..4d0b7a44f19e5 100644 --- a/src/debug-registry.h +++ b/src/debug-registry.h @@ -32,7 +32,7 @@ class JITDebugInfoRegistry std::unique_lock lock; CResourceT &resource; - Lock(std::mutex &mutex, CResourceT &resource) JL_NOTSAFEPOINT : lock(mutex), resource(resource) {} + Lock(std::mutex &mutex, CResourceT &resource) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER : lock(mutex), resource(resource) {} Lock(Lock &&) JL_NOTSAFEPOINT = default; Lock &operator=(Lock &&) JL_NOTSAFEPOINT = default; @@ -56,7 +56,7 @@ class JITDebugInfoRegistry return resource; } - ~Lock() JL_NOTSAFEPOINT = default; + ~Lock() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE = default; }; private: @@ -68,15 +68,15 @@ class JITDebugInfoRegistry Locked(ResourceT resource = ResourceT()) JL_NOTSAFEPOINT : mutex(), resource(std::move(resource)) {} - LockT operator*() JL_NOTSAFEPOINT { + LockT operator*() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER { return LockT(mutex, resource); } - ConstLockT operator*() const JL_NOTSAFEPOINT { + ConstLockT operator*() const JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER { return ConstLockT(mutex, resource); } - ~Locked() JL_NOTSAFEPOINT = default; + ~Locked() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE = default; }; struct image_info_t { @@ -105,6 +105,7 @@ class JITDebugInfoRegistry std::unique_ptr object; std::unique_ptr context; LazyObjectInfo() = delete; + ~LazyObjectInfo() JL_NOTSAFEPOINT = default; }; struct SectionInfo { @@ -113,6 +114,7 @@ class JITDebugInfoRegistry ptrdiff_t slide; uint64_t SectionIndex; SectionInfo() = delete; + ~SectionInfo() JL_NOTSAFEPOINT = default; }; template @@ -145,7 +147,7 @@ class JITDebugInfoRegistry void add_code_in_flight(llvm::StringRef name, jl_code_instance_t *codeinst, const llvm::DataLayout &DL) JL_NOTSAFEPOINT; jl_method_instance_t *lookupLinfo(size_t pointer) JL_NOTSAFEPOINT; void registerJITObject(const llvm::object::ObjectFile &Object, - std::function getLoadAddress); + std::function getLoadAddress) JL_NOTSAFEPOINT; objectmap_t& getObjectMap() JL_NOTSAFEPOINT; void add_image_info(image_info_t info) JL_NOTSAFEPOINT; bool get_image_info(uint64_t base, image_info_t *info) const JL_NOTSAFEPOINT; diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp index f6fca47e9a889..31f1ba8281a89 100644 --- a/src/debuginfo.cpp +++ b/src/debuginfo.cpp @@ -58,7 +58,7 @@ extern "C" void __register_frame(void*) JL_NOTSAFEPOINT; extern "C" void __deregister_frame(void*) JL_NOTSAFEPOINT; template -static void processFDEs(const char *EHFrameAddr, size_t EHFrameSize, callback f) +static void processFDEs(const char *EHFrameAddr, size_t EHFrameSize, callback f) JL_NOTSAFEPOINT { const char *P = EHFrameAddr; const char *End = P + EHFrameSize; @@ -164,6 +164,12 @@ static void jl_profile_atomic(T f) JL_NOTSAFEPOINT // --- storing and accessing source location metadata --- void jl_add_code_in_flight(StringRef name, jl_code_instance_t *codeinst, const DataLayout &DL) { + // Non-opaque-closure MethodInstances are considered globally rooted + // through their methods, but for OC, we need to create a global root + // here. + jl_method_instance_t *mi = codeinst->def; + if (jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure) + jl_as_global_root((jl_value_t*)mi, 1); getJITDebugRegistry().add_code_in_flight(name, codeinst, DL); } @@ -369,11 +375,6 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object, if (codeinst) { JL_GC_PROMISE_ROOTED(codeinst); mi = codeinst->def; - // Non-opaque-closure MethodInstances are considered globally rooted - // through their methods, but for OC, we need to create a global root - // here. - if (jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure) - mi = (jl_method_instance_t*)jl_as_global_root((jl_value_t*)mi, 1); } jl_profile_atomic([&]() JL_NOTSAFEPOINT { if (mi) @@ -1281,14 +1282,14 @@ void register_eh_frames(uint8_t *Addr, size_t Size) { // On OS X OS X __register_frame takes a single FDE as an argument. // See http://lists.cs.uiuc.edu/pipermail/llvmdev/2013-April/061768.html - processFDEs((char*)Addr, Size, [](const char *Entry) { + processFDEs((char*)Addr, Size, [](const char *Entry) JL_NOTSAFEPOINT { getJITDebugRegistry().libc_frames.libc_register_frame(Entry); }); } void deregister_eh_frames(uint8_t *Addr, size_t Size) { - processFDEs((char*)Addr, Size, [](const char *Entry) { + processFDEs((char*)Addr, Size, [](const char *Entry) JL_NOTSAFEPOINT { getJITDebugRegistry().libc_frames.libc_deregister_frame(Entry); }); } @@ -1300,7 +1301,7 @@ void deregister_eh_frames(uint8_t *Addr, size_t Size) // Skip over an arbitrary long LEB128 encoding. // Return the pointer to the first unprocessed byte. -static const uint8_t *consume_leb128(const uint8_t *Addr, const uint8_t *End) +static const uint8_t *consume_leb128(const uint8_t *Addr, const uint8_t *End) JL_NOTSAFEPOINT { const uint8_t *P = Addr; while ((*P >> 7) != 0 && P < End) @@ -1312,7 +1313,7 @@ static const uint8_t *consume_leb128(const uint8_t *Addr, const uint8_t *End) // bytes than what there are more bytes than what the type can store. // Adjust the pointer to the first unprocessed byte. template static T parse_leb128(const uint8_t *&Addr, - const uint8_t *End) + const uint8_t *End) JL_NOTSAFEPOINT { typedef typename std::make_unsigned::type uT; uT v = 0; @@ -1335,7 +1336,7 @@ template static T parse_leb128(const uint8_t *&Addr, } template -static U safe_trunc(T t) +static U safe_trunc(T t) JL_NOTSAFEPOINT { assert((t >= static_cast(std::numeric_limits::min())) && (t <= static_cast(std::numeric_limits::max()))); @@ -1375,7 +1376,7 @@ enum DW_EH_PE : uint8_t { }; // Parse the CIE and return the type of encoding used by FDE -static DW_EH_PE parseCIE(const uint8_t *Addr, const uint8_t *End) +static DW_EH_PE parseCIE(const uint8_t *Addr, const uint8_t *End) JL_NOTSAFEPOINT { // https://www.airs.com/blog/archives/460 // Length (4 bytes) @@ -1481,7 +1482,7 @@ void register_eh_frames(uint8_t *Addr, size_t Size) // Now first count the number of FDEs size_t nentries = 0; - processFDEs((char*)Addr, Size, [&](const char*){ nentries++; }); + processFDEs((char*)Addr, Size, [&](const char*) JL_NOTSAFEPOINT { nentries++; }); if (nentries == 0) return; @@ -1510,7 +1511,7 @@ void register_eh_frames(uint8_t *Addr, size_t Size) // CIE's (may not happen) without parsing it every time. const uint8_t *cur_cie = nullptr; DW_EH_PE encoding = DW_EH_PE_omit; - processFDEs((char*)Addr, Size, [&](const char *Entry) { + processFDEs((char*)Addr, Size, [&](const char *Entry) JL_NOTSAFEPOINT { // Skip Length (4bytes) and CIE offset (4bytes) uint32_t fde_size = *(const uint32_t*)Entry; uint32_t cie_id = ((const uint32_t*)Entry)[1]; @@ -1631,7 +1632,7 @@ void deregister_eh_frames(uint8_t *Addr, size_t Size) #endif extern "C" JL_DLLEXPORT_CODEGEN -uint64_t jl_getUnwindInfo_impl(uint64_t dwAddr) +uint64_t jl_getUnwindInfo_impl(uint64_t dwAddr) JL_NOTSAFEPOINT { // Might be called from unmanaged thread jl_lock_profile(); diff --git a/src/engine.cpp b/src/engine.cpp index 6db4dce44e48e..2b68de731c4dd 100644 --- a/src/engine.cpp +++ b/src/engine.cpp @@ -45,8 +45,8 @@ template<> struct llvm::DenseMapInfo { } }; -static std::mutex engine_lock; -static std::condition_variable engine_wait; +static std::mutex engine_lock; // n.b. this lock is only ever held briefly +static std::condition_variable engine_wait; // but it may be waiting a while in this state // map from MethodInstance to threadid that owns it currently for inference static DenseMap Reservations; // vector of which threads are blocked and which lease they need @@ -63,55 +63,51 @@ jl_code_instance_t *jl_engine_reserve(jl_method_instance_t *m, jl_value_t *owner ct->ptls->engine_nqueued++; // disables finalizers until inference is finished on this method graph jl_code_instance_t *ci = jl_new_codeinst_uninit(m, owner); // allocate a placeholder JL_GC_PUSH1(&ci); - int8_t gc_state = jl_gc_safe_enter(ct->ptls); - InferKey key = {m, owner}; - std::unique_lock lock(engine_lock); auto tid = jl_atomic_load_relaxed(&ct->tid); - if ((signed)Awaiting.size() < tid + 1) - Awaiting.resize(tid + 1); - while (1) { - auto record = Reservations.find(key); - if (record == Reservations.end()) { - Reservations[key] = ReservationInfo{tid, ci}; - lock.unlock(); - jl_gc_safe_leave(ct->ptls, gc_state); // contains jl_gc_safepoint - JL_GC_POP(); - return ci; - } - // before waiting, need to run deadlock/cycle detection - // there is a cycle if the thread holding our lease is blocked - // and waiting for (transitively) any lease that is held by this thread - auto wait_tid = record->second.tid; - while (1) { - if (wait_tid == tid) { - lock.unlock(); - jl_gc_safe_leave(ct->ptls, gc_state); // contains jl_gc_safepoint - JL_GC_POP(); - ct->ptls->engine_nqueued--; - return ci; // break the cycle + if (([tid, m, owner, ci] () -> bool { // necessary scope block / lambda for unique_lock + jl_unique_gcsafe_lock lock(engine_lock); + InferKey key{m, owner}; + if ((signed)Awaiting.size() < tid + 1) + Awaiting.resize(tid + 1); + while (1) { + auto record = Reservations.find(key); + if (record == Reservations.end()) { + Reservations[key] = ReservationInfo{tid, ci}; + return false; + } + // before waiting, need to run deadlock/cycle detection + // there is a cycle if the thread holding our lease is blocked + // and waiting for (transitively) any lease that is held by this thread + auto wait_tid = record->second.tid; + while (1) { + if (wait_tid == tid) + return true; + if ((signed)Awaiting.size() <= wait_tid) + break; // no cycle, since it is running (and this should be unreachable) + auto key2 = Awaiting[wait_tid]; + if (key2.mi == nullptr) + break; // no cycle, since it is running + auto record2 = Reservations.find(key2); + if (record2 == Reservations.end()) + break; // no cycle, since it is about to resume + assert(wait_tid != record2->second.tid); + wait_tid = record2->second.tid; + } + Awaiting[tid] = key; + lock.wait(engine_wait); + Awaiting[tid] = InferKey{}; } - if ((signed)Awaiting.size() <= wait_tid) - break; // no cycle, since it is running (and this should be unreachable) - auto key2 = Awaiting[wait_tid]; - if (key2.mi == nullptr) - break; // no cycle, since it is running - auto record2 = Reservations.find(key2); - if (record2 == Reservations.end()) - break; // no cycle, since it is about to resume - assert(wait_tid != record2->second.tid); - wait_tid = record2->second.tid; - } - Awaiting[tid] = key; - engine_wait.wait(lock); - Awaiting[tid] = InferKey{}; - } + })()) + ct->ptls->engine_nqueued--; + JL_GC_POP(); + return ci; } int jl_engine_hasreserved(jl_method_instance_t *m, jl_value_t *owner) { jl_task_t *ct = jl_current_task; InferKey key = {m, owner}; - std::unique_lock lock(engine_lock); + std::unique_lock lock(engine_lock); auto record = Reservations.find(key); return record != Reservations.end() && record->second.tid == jl_atomic_load_relaxed(&ct->tid); } @@ -123,7 +119,7 @@ STATIC_INLINE int gc_marked(uintptr_t bits) JL_NOTSAFEPOINT void jl_engine_sweep(jl_ptls_t *gc_all_tls_states) { - std::unique_lock lock(engine_lock); + std::unique_lock lock(engine_lock); bool any = false; for (auto I = Reservations.begin(); I != Reservations.end(); ++I) { jl_code_instance_t *ci = I->second.ci; @@ -142,7 +138,7 @@ void jl_engine_sweep(jl_ptls_t *gc_all_tls_states) void jl_engine_fulfill(jl_code_instance_t *ci, jl_code_info_t *src) { jl_task_t *ct = jl_current_task; - std::unique_lock lock(engine_lock); + std::unique_lock lock(engine_lock); auto record = Reservations.find(InferKey{ci->def, ci->owner}); if (record == Reservations.end() || record->second.ci != ci) return; @@ -152,7 +148,6 @@ void jl_engine_fulfill(jl_code_instance_t *ci, jl_code_info_t *src) engine_wait.notify_all(); } - #ifdef __cplusplus } #endif diff --git a/src/gf.c b/src/gf.c index fc2e62ebff96b..e77c950c38ae4 100644 --- a/src/gf.c +++ b/src/gf.c @@ -3222,6 +3222,23 @@ jl_value_t *jl_argtype_with_function_type(jl_value_t *ft JL_MAYBE_UNROOTED, jl_v return tt; } +// undo jl_argtype_with_function transform +jl_value_t *jl_argtype_without_function(jl_value_t *ftypes) +{ + jl_value_t *types = jl_unwrap_unionall(ftypes); + size_t l = jl_nparams(types); + if (l == 1 && jl_is_vararg(jl_tparam0(types))) + return ftypes; + jl_value_t *tt = (jl_value_t*)jl_alloc_svec(l - 1); + JL_GC_PUSH1(&tt); + for (size_t i = 1; i < l; i++) + jl_svecset(tt, i - 1, jl_tparam(types, i)); + tt = (jl_value_t*)jl_apply_tuple_type((jl_svec_t*)tt, 0); + tt = jl_rewrap_unionall_(tt, types); + JL_GC_POP(); + return tt; +} + #ifdef JL_TRACE static int trace_en = 0; static int error_en = 1; diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 313449dda5557..8b8004af03616 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -64,9 +64,6 @@ using namespace llvm; #define DEBUG_TYPE "julia_jitlayers" STATISTIC(LinkedGlobals, "Number of globals linked"); -STATISTIC(CompiledCodeinsts, "Number of codeinsts compiled directly"); -STATISTIC(MaxWorkqueueSize, "Maximum number of elements in the workqueue"); -STATISTIC(IndirectCodeinsts, "Number of dependent codeinsts compiled"); STATISTIC(SpecFPtrCount, "Number of specialized function pointers compiled"); STATISTIC(UnspecFPtrCount, "Number of specialized function pointers compiled"); STATISTIC(ModulesAdded, "Number of modules added to the JIT"); @@ -151,13 +148,6 @@ void jl_dump_llvm_opt_impl(void *s) **jl_ExecutionEngine->get_dump_llvm_opt_stream() = (ios_t*)s; } -#ifndef JL_USE_JITLINK -static int jl_add_to_ee( - orc::ThreadSafeModule &M, - const StringMap &NewExports, - DenseMap &Queued, - SmallVectorImpl &Stack) JL_NOTSAFEPOINT; -#endif static void jl_decorate_module(Module &M) JL_NOTSAFEPOINT; void jl_link_global(GlobalVariable *GV, void *addr) JL_NOTSAFEPOINT @@ -187,214 +177,536 @@ void jl_jit_globals(std::map &globals) JL_NOTSAFEPOINT } } -// this generates llvm code for the lambda info -// and adds the result to the jitlayers -// (and the shadow module), -// and generates code for it -static jl_callptr_t _jl_compile_codeinst( - jl_code_instance_t *codeinst, - jl_code_info_t *src, - orc::ThreadSafeContext context) + // lock for places where only single threaded behavior is implemented, so we need GC support +static jl_mutex_t jitlock; + // locks for adding external code to the JIT atomically +static std::mutex extern_c_lock; + // locks and barriers for this state +static std::mutex engine_lock; +static std::condition_variable engine_wait; +static int threads_in_compiler_phase; + // the TSM for each codeinst +static SmallVector sharedmodules; +static DenseMap emittedmodules; + // the invoke and specsig function names in the JIT +static DenseMap invokenames; + // everything that any thread wants to compile right now +static DenseSet compileready; + // everything that any thread has compiled recently +static DenseSet linkready; + // a map from a codeinst to the outgoing edges needed before linking it +static DenseMap> complete_graph; + // the state for each codeinst and the number of unresolved edges (we don't + // really need this once JITLink is available everywhere, since every module + // is automatically complete, and we can emit any required fixups later as a + // separate module) +static DenseMap> incompletemodules; + // the set of incoming unresolved edges resolved by a codeinstance +static DenseMap> incomplete_rgraph; + +// Lock hierarchy here: +// jitlock is outermost, can contain others and allows GC +// engine_lock is next +// ThreadSafeContext locks are next, they should not be nested (unless engine_lock is also held, but this may make TSAN sad anyways) +// extern_c_lock is next +// jl_ExecutionEngine internal locks are exclusive to this list, since OrcJIT promises to never hold a lock over a materialization unit: +// construct a query object from a query set and query handler +// lock the session +// lodge query against requested symbols, collect required materializers (if any) +// unlock the session +// dispatch materializers (if any) +// However, this guarantee relies on Julia releasing all TSC locks before causing any materialization units to be dispatched +// as materialization may need to acquire TSC locks. + + +static void finish_params(Module *M, jl_codegen_params_t ¶ms) JL_NOTSAFEPOINT { - // caller must hold codegen_lock - // and have disabled finalizers - uint64_t start_time = 0; - bool timed = !!*jl_ExecutionEngine->get_dump_compiles_stream(); - if (timed) - start_time = jl_hrtime(); + if (params._shared_module) { + sharedmodules.push_back(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx)); + } + + // In imaging mode, we can't inline global variable initializers in order to preserve + // the fiction that we don't know what loads from the global will return. Thus, we + // need to emit a separate module for the globals before any functions are compiled, + // to ensure that the globals are defined when they are compiled. + if (params.imaging_mode) { + if (!params.global_targets.empty()) { + void **globalslots = new void*[params.global_targets.size()]; + void **slot = globalslots; + for (auto &global : params.global_targets) { + auto GV = global.second; + *slot = global.first; + jl_ExecutionEngine->addGlobalMapping(GV->getName(), (uintptr_t)slot); + slot++; + } +#ifdef __clang_analyzer__ + static void **leaker = globalslots; // for the purpose of the analyzer, we need to expressly leak this variable or it thinks we forgot to free it +#endif + } + } + else { + StringMap NewGlobals; + for (auto &global : params.global_targets) { + NewGlobals[global.second->getName()] = global.first; + } + for (auto &GV : M->globals()) { + auto InitValue = NewGlobals.find(GV.getName()); + if (InitValue != NewGlobals.end()) { + jl_link_global(&GV, InitValue->second); + } + } + } +} - assert(jl_is_code_instance(codeinst)); - JL_TIMING(CODEINST_COMPILE, CODEINST_COMPILE); - jl_callptr_t fptr = NULL; - // emit the code in LLVM IR form - jl_codegen_params_t params(std::move(context), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context - params.cache = true; - params.imaging_mode = imaging_default(); - params.debug_level = jl_options.debug_level; - { - orc::ThreadSafeModule result_m = - jl_create_ts_module(name_from_method_instance(codeinst->def), params.tsctx, params.DL, params.TargetTriple); - jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params); - if (result_m) - params.compiled_functions[codeinst] = {std::move(result_m), std::move(decls)}; - jl_compile_workqueue(params, CompilationPolicy::Default); - - if (params._shared_module) { - jl_ExecutionEngine->optimizeDLSyms(*params._shared_module); - jl_ExecutionEngine->addModule(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx)); +static int jl_analyze_workqueue(jl_code_instance_t *callee, jl_codegen_params_t ¶ms, bool forceall=false) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER +{ + jl_task_t *ct = jl_current_task; + decltype(params.workqueue) edges; + std::swap(params.workqueue, edges); + for (auto &it : edges) { + jl_code_instance_t *codeinst = it.first; + auto &proto = it.second; + // try to emit code for this item from the workqueue + StringRef invokeName = ""; + StringRef preal_decl = ""; + bool preal_specsig = false; + jl_callptr_t invoke = nullptr; + bool isedge = false; + assert(params.cache); + // Checking the cache here is merely an optimization and not strictly required + // But it must be consistent with the following invokenames lookup, which is protected by the engine_lock + uint8_t specsigflags; + void *fptr; + jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &fptr, 0); + //if (specsig ? specsigflags & 0b1 : invoke == jl_fptr_args_addr) + if (invoke == jl_fptr_args_addr) { + preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst); } - - // In imaging mode, we can't inline global variable initializers in order to preserve - // the fiction that we don't know what loads from the global will return. Thus, we - // need to emit a separate module for the globals before any functions are compiled, - // to ensure that the globals are defined when they are compiled. - if (params.imaging_mode) { - // Won't contain any PLT/dlsym calls, so no need to optimize those - if (!params.global_targets.empty()) { - void **globalslots = new void*[params.global_targets.size()]; - void **slot = globalslots; - for (auto &global : params.global_targets) { - auto GV = global.second; - *slot = global.first; - jl_ExecutionEngine->addGlobalMapping(GV->getName(), (uintptr_t)slot); - slot++; + else if (specsigflags & 0b1) { + preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst); + preal_specsig = true; + } + bool force = forceall || invoke != nullptr; + if (preal_decl.empty()) { + auto it = invokenames.find(codeinst); + if (it != invokenames.end()) { + auto &decls = it->second; + invokeName = decls.functionObject; + if (decls.functionObject == "jl_fptr_args") { + preal_decl = decls.specFunctionObject; + isedge = true; } -#ifdef __clang_analyzer__ - static void **leaker = globalslots; // for the purpose of the analyzer, we need to expressly leak this variable or it thinks we forgot to free it -#endif + else if (decls.functionObject != "jl_fptr_sparam" && decls.functionObject != "jl_f_opaque_closure_call") { + preal_decl = decls.specFunctionObject; + preal_specsig = true; + isedge = true; + } + force = true; } } - else { - StringMap NewGlobals; - for (auto &global : params.global_targets) { - NewGlobals[global.second->getName()] = global.first; + if (!preal_decl.empty() || force) { + // if we have a prototype emitted, compare it to what we emitted earlier + Module *mod = proto.decl->getParent(); + assert(proto.decl->isDeclaration()); + Function *pinvoke = nullptr; + if (preal_decl.empty()) { + if (invoke != nullptr && invokeName.empty()) { + assert(invoke != jl_fptr_args_addr); + if (invoke == jl_fptr_sparam_addr) + invokeName = "jl_fptr_sparam"; + else if (invoke == jl_f_opaque_closure_call_addr) + invokeName = "jl_f_opaque_closure_call"; + else + invokeName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, invoke, codeinst); + } + pinvoke = emit_tojlinvoke(codeinst, invokeName, mod, params); + if (!proto.specsig) + proto.decl->replaceAllUsesWith(pinvoke); + isedge = false; } - for (auto &def : params.compiled_functions) { - auto M = std::get<0>(def.second).getModuleUnlocked(); - for (auto &GV : M->globals()) { - auto InitValue = NewGlobals.find(GV.getName()); - if (InitValue != NewGlobals.end()) { - jl_link_global(&GV, InitValue->second); - } + if (proto.specsig && !preal_specsig) { + // get or build an fptr1 that can invoke codeinst + if (pinvoke == nullptr) + pinvoke = get_or_emit_fptr1(preal_decl, mod); + // emit specsig-to-(jl)invoke conversion + proto.decl->setLinkage(GlobalVariable::InternalLinkage); + //protodecl->setAlwaysInline(); + jl_init_function(proto.decl, params.TargetTriple); + // TODO: maybe this can be cached in codeinst->specfptr? + int8_t gc_state = jl_gc_unsafe_enter(ct->ptls); // codegen may contain safepoints (such as jl_subtype calls) + jl_method_instance_t *mi = codeinst->def; + size_t nrealargs = jl_nparams(mi->specTypes); // number of actual arguments being passed + bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure; + emit_specsig_to_fptr1(proto.decl, proto.cc, proto.return_roots, mi->specTypes, codeinst->rettype, is_opaque_closure, nrealargs, params, pinvoke, 0, 0); + jl_gc_unsafe_leave(ct->ptls, gc_state); + preal_decl = ""; // no need to fixup the name + } + if (!preal_decl.empty()) { + // merge and/or rename this prototype to the real function + if (Value *specfun = mod->getNamedValue(preal_decl)) { + if (proto.decl != specfun) + proto.decl->replaceAllUsesWith(specfun); + } + else { + proto.decl->setName(preal_decl); } } - } - -#ifndef JL_USE_JITLINK - // Collect the exported functions from the params.compiled_functions modules, - // which form dependencies on which functions need to be - // compiled first. Cycles of functions are compiled together. - // (essentially we compile a DAG of SCCs in reverse topological order, - // if we treat declarations of external functions as edges from declaration - // to definition) - StringMap NewExports; - for (auto &def : params.compiled_functions) { - orc::ThreadSafeModule &TSM = std::get<0>(def.second); - //The underlying context object is still locked because params is not destroyed yet - auto M = TSM.getModuleUnlocked(); - jl_ExecutionEngine->optimizeDLSyms(*M); - for (auto &F : M->global_objects()) { - if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) { - NewExports[F.getName()] = &TSM; + if (proto.oc) { // additionally, if we are dealing with an OC constructor, then we might also need to fix up the fptr1 reference too + assert(proto.specsig); + StringRef ocinvokeDecl = invokeName; + if (invoke != nullptr && ocinvokeDecl.empty()) { + // check for some special tokens used by opaque_closure.c and convert those to their real functions + assert(invoke != jl_fptr_args_addr); + assert(invoke != jl_fptr_sparam_addr); + if (invoke == jl_fptr_interpret_call_addr) + ocinvokeDecl = "jl_fptr_interpret_call"; + else if (invoke == jl_fptr_const_return_addr) + ocinvokeDecl = "jl_fptr_const_return"; + else if (invoke == jl_f_opaque_closure_call_addr) + ocinvokeDecl = "jl_f_opaque_closure_call"; + //else if (invoke == jl_interpret_opaque_closure_addr) + else + ocinvokeDecl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, invoke, codeinst); + } + // if OC expected a specialized specsig dispatch, but we don't have it, use the inner trampoline here too + // XXX: this invoke translation logic is supposed to exactly match new_opaque_closure + if (!preal_specsig || ocinvokeDecl == "jl_f_opaque_closure_call" || ocinvokeDecl == "jl_fptr_interpret_call" || ocinvokeDecl == "jl_fptr_const_return") { + if (pinvoke == nullptr) + ocinvokeDecl = get_or_emit_fptr1(preal_decl, mod)->getName(); + else + ocinvokeDecl = pinvoke->getName(); + } + assert(!ocinvokeDecl.empty()); + assert(ocinvokeDecl != "jl_fptr_args"); + assert(ocinvokeDecl != "jl_fptr_sparam"); + // merge and/or rename this prototype to the real function + if (Value *specfun = mod->getNamedValue(ocinvokeDecl)) { + if (proto.oc != specfun) + proto.oc->replaceAllUsesWith(specfun); + } + else { + proto.oc->setName(ocinvokeDecl); } } } - DenseMap Queued; - SmallVector Stack; - for (auto &def : params.compiled_functions) { - // Add the results to the execution engine now - orc::ThreadSafeModule &M = std::get<0>(def.second); - jl_add_to_ee(M, NewExports, Queued, Stack); - assert(Queued.empty() && Stack.empty() && !M); + else { + isedge = true; + params.workqueue.push_back(it); + incomplete_rgraph[codeinst].push_back(callee); } -#else - for (auto &def : params.compiled_functions) { - // Add the results to the execution engine now - orc::ThreadSafeModule &M = std::get<0>(def.second); - if (M) - jl_ExecutionEngine->addModule(std::move(M)); + if (isedge) + complete_graph[callee].push_back(codeinst); + } + return params.workqueue.size(); +} + +// test whether codeinst->invoke is usable already without further compilation needed +static bool jl_is_compiled_codeinst(jl_code_instance_t *codeinst) JL_NOTSAFEPOINT +{ + auto invoke = jl_atomic_load_relaxed(&codeinst->invoke); + if (invoke == nullptr || invoke == jl_fptr_wait_for_compiled_addr) + return false; + return true; +} + +// move codeinst (and deps) from incompletemodules to emitted modules +// and populate compileready from complete_graph +static void prepare_compile(jl_code_instance_t *codeinst) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER +{ + SmallVector workqueue; + workqueue.push_back(codeinst); + while (!workqueue.empty()) { + codeinst = workqueue.pop_back_val(); + if (!invokenames.count(codeinst)) { + // this means it should be compiled already while the callee was in stasis + assert(jl_is_compiled_codeinst(codeinst)); + continue; } -#endif - ++CompiledCodeinsts; - MaxWorkqueueSize.updateMax(params.compiled_functions.size()); - IndirectCodeinsts += params.compiled_functions.size() - 1; - } - - // batch compile job for all new functions - SmallVector NewDefs; - for (auto &def : params.compiled_functions) { - jl_llvm_functions_t &decls = std::get<1>(def.second); - if (decls.functionObject != "jl_fptr_args" && - decls.functionObject != "jl_fptr_sparam" && - decls.functionObject != "jl_f_opaque_closure_call") - NewDefs.push_back(decls.functionObject); - if (!decls.specFunctionObject.empty()) - NewDefs.push_back(decls.specFunctionObject); - } - auto Addrs = jl_ExecutionEngine->findSymbols(NewDefs); - - size_t i = 0; - size_t nextaddr = 0; - for (auto &def : params.compiled_functions) { - jl_code_instance_t *this_code = def.first; - if (i < jl_timing_print_limit) - jl_timing_show_func_sig(this_code->def->specTypes, JL_TIMING_DEFAULT_BLOCK); - - jl_llvm_functions_t &decls = std::get<1>(def.second); - jl_callptr_t addr; - bool isspecsig = false; - if (decls.functionObject == "jl_fptr_args") { - addr = jl_fptr_args_addr; + // if this was incomplete, force completion now of it + auto it = incompletemodules.find(codeinst); + if (it != incompletemodules.end()) { + int waiting = 0; + auto &edges = complete_graph[codeinst]; + auto edges_end = std::remove_if(edges.begin(), edges.end(), [&waiting, codeinst] (jl_code_instance_t *edge) JL_NOTSAFEPOINT -> bool { + auto &redges = incomplete_rgraph[edge]; + // waiting += std::erase(redges, codeinst); + auto redges_end = std::remove(redges.begin(), redges.end(), codeinst); + if (redges_end != redges.end()) { + waiting += redges.end() - redges_end; + redges.erase(redges_end, redges.end()); + assert(!invokenames.count(edge)); + } + return !invokenames.count(edge); + }); + edges.erase(edges_end, edges.end()); + assert(waiting == std::get<1>(it->second)); + std::get<1>(it->second) = 0; + auto ¶ms = std::get<0>(it->second); + params.tsctx_lock = params.tsctx.getLock(); + waiting = jl_analyze_workqueue(codeinst, params, true); // may safepoint + assert(!waiting); (void)waiting; + Module *M = emittedmodules[codeinst].getModuleUnlocked(); + finish_params(M, params); + incompletemodules.erase(it); + } + // and then indicate this should be compiled now + if (!linkready.count(codeinst) && compileready.insert(codeinst).second) { + auto edges = complete_graph.find(codeinst); + if (edges != complete_graph.end()) { + workqueue.append(edges->second); + } + } + } +} + +// notify any other pending work that this edge now has code defined +static void complete_emit(jl_code_instance_t *edge) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER +{ + auto notify = incomplete_rgraph.find(edge); + if (notify == incomplete_rgraph.end()) + return; + auto redges = std::move(notify->second); + incomplete_rgraph.erase(notify); + for (size_t i = 0; i < redges.size(); i++) { + jl_code_instance_t *callee = redges[i]; + auto it = incompletemodules.find(callee); + assert(it != incompletemodules.end()); + if (--std::get<1>(it->second) == 0) { + auto ¶ms = std::get<0>(it->second); + params.tsctx_lock = params.tsctx.getLock(); + assert(callee == it->first); + int waiting = jl_analyze_workqueue(callee, params); // may safepoint + assert(!waiting); (void)waiting; + Module *M = emittedmodules[callee].getModuleUnlocked(); + finish_params(M, params); + incompletemodules.erase(it); } - else if (decls.functionObject == "jl_fptr_sparam") { - addr = jl_fptr_sparam_addr; + } +} + + +// set the invoke field for codeinst (and all deps, and assist with other pending work from other threads) now +static void jl_compile_codeinst_now(jl_code_instance_t *codeinst) +{ + jl_unique_gcsafe_lock lock(engine_lock); + if (!invokenames.count(codeinst)) + return; + threads_in_compiler_phase++; + prepare_compile(codeinst); // may safepoint + while (1) { + // TODO: split up this work by ThreadSafeContext, so two threads don't need to get the same locks and stall + if (!sharedmodules.empty()) { + auto TSM = sharedmodules.pop_back_val(); + lock.native.unlock(); + { + auto Lock = TSM.getContext().getLock(); + jl_ExecutionEngine->optimizeDLSyms(*TSM.getModuleUnlocked()); // may safepoint + } + jl_ExecutionEngine->addModule(std::move(TSM)); + lock.native.lock(); } - else if (decls.functionObject == "jl_f_opaque_closure_call") { - addr = jl_f_opaque_closure_call_addr; + else if (!compileready.empty()) { + // move a function from compileready to linkready then compile it + auto compilenext = compileready.begin(); + codeinst = *compilenext; + compileready.erase(compilenext); + auto TSMref = emittedmodules.find(codeinst); + assert(TSMref != emittedmodules.end()); + auto TSM = std::move(TSMref->second); + linkready.insert(codeinst); + emittedmodules.erase(TSMref); + lock.native.unlock(); + uint64_t start_time = jl_hrtime(); + { + auto Lock = TSM.getContext().getLock(); + jl_ExecutionEngine->optimizeDLSyms(*TSM.getModuleUnlocked()); // may safepoint + } + jl_ExecutionEngine->addModule(std::move(TSM)); // may safepoint + // If logging of the compilation stream is enabled, + // then dump the method-instance specialization type to the stream + jl_method_instance_t *mi = codeinst->def; + if (jl_is_method(mi->def.method)) { + auto stream = *jl_ExecutionEngine->get_dump_compiles_stream(); + if (stream) { + uint64_t end_time = jl_hrtime(); + ios_printf(stream, "%" PRIu64 "\t\"", end_time - start_time); + jl_static_show((JL_STREAM*)stream, mi->specTypes); + ios_printf(stream, "\"\n"); + } + } + lock.native.lock(); } else { - assert(NewDefs[nextaddr] == decls.functionObject); - addr = (jl_callptr_t)Addrs[nextaddr++]; - assert(addr); - isspecsig = true; + break; } - if (!decls.specFunctionObject.empty()) { - void *prev_specptr = NULL; - assert(NewDefs[nextaddr] == decls.specFunctionObject); - void *spec = (void*)Addrs[nextaddr++]; - assert(spec); - if (jl_atomic_cmpswap_acqrel(&this_code->specptr.fptr, &prev_specptr, spec)) { - // only set specsig and invoke if we were the first to set specptr - jl_atomic_store_relaxed(&this_code->specsigflags, (uint8_t) isspecsig); - // we might overwrite invokeptr here; that's ok, anybody who relied on the identity of invokeptr - // either assumes that specptr was null, doesn't care about specptr, - // or will wait until specsigflags has 0b10 set before reloading invoke - jl_atomic_store_release(&this_code->invoke, addr); - jl_atomic_store_release(&this_code->specsigflags, (uint8_t) (0b10 | isspecsig)); - } else { - //someone else beat us, don't commit any results - while (!(jl_atomic_load_acquire(&this_code->specsigflags) & 0b10)) { - jl_cpu_pause(); + } + codeinst = nullptr; + // barrier until all threads have finished calling addModule + if (--threads_in_compiler_phase == 0) { + // the last thread out will finish linking everything + // then release all of the other threads + // move the function pointers out from invokenames to the codeinst + + // batch compile job for all new functions + SmallVector NewDefs; + for (auto &this_code : linkready) { + auto it = invokenames.find(this_code); + assert(it != invokenames.end()); + jl_llvm_functions_t &decls = it->second; + assert(!decls.functionObject.empty()); + if (decls.functionObject != "jl_fptr_args" && + decls.functionObject != "jl_fptr_sparam" && + decls.functionObject != "jl_f_opaque_closure_call") + NewDefs.push_back(decls.functionObject); + if (!decls.specFunctionObject.empty()) + NewDefs.push_back(decls.specFunctionObject); + } + auto Addrs = jl_ExecutionEngine->findSymbols(NewDefs); + + size_t nextaddr = 0; + for (auto &this_code : linkready) { + auto it = invokenames.find(this_code); + assert(it != invokenames.end()); + jl_llvm_functions_t &decls = it->second; + jl_callptr_t addr; + bool isspecsig = false; + if (decls.functionObject == "jl_fptr_args") { + addr = jl_fptr_args_addr; + } + else if (decls.functionObject == "jl_fptr_sparam") { + addr = jl_fptr_sparam_addr; + } + else if (decls.functionObject == "jl_f_opaque_closure_call") { + addr = jl_f_opaque_closure_call_addr; + } + else { + assert(NewDefs[nextaddr] == decls.functionObject); + addr = (jl_callptr_t)Addrs[nextaddr++]; + assert(addr); + isspecsig = true; + } + if (!decls.specFunctionObject.empty()) { + void *prev_specptr = nullptr; + assert(NewDefs[nextaddr] == decls.specFunctionObject); + void *spec = (void*)Addrs[nextaddr++]; + assert(spec); + if (jl_atomic_cmpswap_acqrel(&this_code->specptr.fptr, &prev_specptr, spec)) { + // only set specsig and invoke if we were the first to set specptr + jl_atomic_store_relaxed(&this_code->specsigflags, (uint8_t) isspecsig); + // we might overwrite invokeptr here; that's ok, anybody who relied on the identity of invokeptr + // either assumes that specptr was null, doesn't care about specptr, + // or will wait until specsigflags has 0b10 set before reloading invoke + jl_atomic_store_release(&this_code->invoke, addr); + jl_atomic_store_release(&this_code->specsigflags, (uint8_t) (0b10 | isspecsig)); + } + else { + //someone else beat us, don't commit any results + while (!(jl_atomic_load_acquire(&this_code->specsigflags) & 0b10)) { + jl_cpu_pause(); + } + addr = jl_atomic_load_relaxed(&this_code->invoke); } - addr = jl_atomic_load_relaxed(&this_code->invoke); } - } else { - jl_callptr_t prev_invoke = NULL; - // Allow replacing addr if it is either NULL or our special waiting placeholder. - if (!jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) { - if (prev_invoke == jl_fptr_wait_for_compiled_addr && !jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) { - addr = prev_invoke; - //TODO do we want to potentially promote invoke anyways? (e.g. invoke is jl_interpret_call or some other - //known lesser function) + else { + jl_callptr_t prev_invoke = nullptr; + // Allow replacing addr if it is either nullptr or our special waiting placeholder. + if (!jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) { + if (prev_invoke == jl_fptr_wait_for_compiled_addr && !jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) { + addr = prev_invoke; + //TODO do we want to potentially promote invoke anyways? (e.g. invoke is jl_interpret_call or some other + //known lesser function) + } } } + invokenames.erase(it); + complete_graph.erase(this_code); } - if (this_code == codeinst) - fptr = addr; - i++; + linkready.clear(); + engine_wait.notify_all(); + } + else while (threads_in_compiler_phase) { + lock.wait(engine_wait); } - if (i > jl_timing_print_limit) - jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, "... <%d methods truncated>", i - 10); +} - uint64_t end_time = 0; - if (timed) - end_time = jl_hrtime(); - - // If logging of the compilation stream is enabled, - // then dump the method-instance specialization type to the stream - jl_method_instance_t *mi = codeinst->def; - if (jl_is_method(mi->def.method)) { - auto stream = *jl_ExecutionEngine->get_dump_compiles_stream(); - if (stream) { - ios_printf(stream, "%" PRIu64 "\t\"", end_time - start_time); - jl_static_show((JL_STREAM*)stream, mi->specTypes); - ios_printf(stream, "\"\n"); +static void jl_emit_codeinst_to_jit( + jl_code_instance_t *codeinst, + jl_code_info_t *src) +{ + { // lock scope + jl_unique_gcsafe_lock lock(engine_lock); + if (invokenames.count(codeinst) || jl_is_compiled_codeinst(codeinst)) + return; + } + JL_TIMING(CODEINST_COMPILE, CODEINST_COMPILE); + // emit the code in LLVM IR form to the new context + jl_codegen_params_t params(std::make_unique(), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context + params.cache = true; + params.imaging_mode = imaging_default(); + params.debug_level = jl_options.debug_level; + orc::ThreadSafeModule result_m = + jl_create_ts_module(name_from_method_instance(codeinst->def), params.tsctx, params.DL, params.TargetTriple); + jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params); // contains safepoints + if (!result_m) + return; + { // drop lock before acquiring engine_lock + auto release = std::move(params.tsctx_lock); + } + jl_unique_gcsafe_lock lock(engine_lock); + if (invokenames.count(codeinst) || jl_is_compiled_codeinst(codeinst)) + return; // destroy everything + invokenames[codeinst] = std::move(decls); + complete_emit(codeinst); + params.tsctx_lock = params.tsctx.getLock(); // re-acquire lock + int waiting = jl_analyze_workqueue(codeinst, params); + if (waiting) { + auto release = std::move(params.tsctx_lock); // unlock again before moving from it + incompletemodules.insert(std::pair(codeinst, std::tuple(std::move(params), waiting))); + } + else { + finish_params(result_m.getModuleUnlocked(), params); + } + emittedmodules[codeinst] = std::move(result_m); +} + +static void recursive_compile_graph( + jl_code_instance_t *codeinst, + jl_code_info_t *src) +{ + jl_emit_codeinst_to_jit(codeinst, src); + DenseSet Seen; + SmallVector workqueue; + workqueue.push_back(codeinst); + // if any edges were incomplete, try to complete them now + while (!workqueue.empty()) { + auto this_code = workqueue.pop_back_val(); + if (Seen.insert(this_code).second) { + if (this_code != codeinst) + jl_emit_codeinst_to_jit(this_code, nullptr); // contains safepoints + jl_unique_gcsafe_lock lock(engine_lock); + auto edges = complete_graph.find(this_code); + if (edges != complete_graph.end()) { + workqueue.append(edges->second); + } } } - return fptr; } +// this generates llvm code for the lambda info +// and adds the result to the jitlayers +// (and the shadow module), +// and generates code for it +static jl_callptr_t _jl_compile_codeinst( + jl_code_instance_t *codeinst, + jl_code_info_t *src) +{ + recursive_compile_graph(codeinst, src); + jl_compile_codeinst_now(codeinst); + return jl_atomic_load_acquire(&codeinst->invoke); +} + + const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysimg_handle, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t ¶ms); // compile a C-callable alias @@ -415,42 +727,40 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void * orc::ThreadSafeModule backing; if (into == NULL) { if (!pparams) { - ctx = jl_ExecutionEngine->acquireContext(); + ctx = jl_ExecutionEngine->makeContext(); } backing = jl_create_ts_module("cextern", pparams ? pparams->tsctx : ctx, pparams ? pparams->DL : jl_ExecutionEngine->getDataLayout(), pparams ? pparams->TargetTriple : jl_ExecutionEngine->getTargetTriple()); into = &backing; } - auto target_info = into->withModuleDo([&](Module &M) { - return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple())); - }); - jl_codegen_params_t params(into->getContext(), std::move(target_info.first), std::move(target_info.second)); - params.imaging_mode = imaging_default(); - params.debug_level = jl_options.debug_level; - if (pparams == NULL) - pparams = ¶ms; - assert(pparams->tsctx.getContext() == into->getContext().getContext()); - const char *name = jl_generate_ccallable(wrap(into), sysimg, declrt, sigt, *pparams); bool success = true; - if (!sysimg) { - JL_LOCK(&jl_ExecutionEngine->jitlock); - if (jl_ExecutionEngine->getGlobalValueAddress(name)) { - success = false; - } - if (success && p == NULL) { - jl_jit_globals(params.global_targets); - assert(params.workqueue.empty()); - if (params._shared_module) { - jl_ExecutionEngine->optimizeDLSyms(*params._shared_module); - jl_ExecutionEngine->addModule(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx)); + { + auto Lock = into->getContext().getLock(); + Module *M = into->getModuleUnlocked(); + jl_codegen_params_t params(into->getContext(), M->getDataLayout(), Triple(M->getTargetTriple())); + params.imaging_mode = imaging_default(); + params.debug_level = jl_options.debug_level; + if (pparams == NULL) + pparams = ¶ms; + assert(pparams->tsctx.getContext() == into->getContext().getContext()); + const char *name = jl_generate_ccallable(wrap(into), sysimg, declrt, sigt, *pparams); + if (!sysimg) { + jl_unique_gcsafe_lock lock(extern_c_lock); + if (jl_ExecutionEngine->getGlobalValueAddress(name)) { + success = false; + } + if (success && p == NULL) { + jl_jit_globals(params.global_targets); + assert(params.workqueue.empty()); + if (params._shared_module) { + jl_ExecutionEngine->optimizeDLSyms(*params._shared_module); // safepoint + jl_ExecutionEngine->addModule(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx)); + } + } + if (success && llvmmod == NULL) { + jl_ExecutionEngine->optimizeDLSyms(*M); // safepoint + jl_ExecutionEngine->addModule(std::move(*into)); } } - if (success && llvmmod == NULL) { - into->withModuleDo([&](Module &M) { - jl_ExecutionEngine->optimizeDLSyms(M); - }); - jl_ExecutionEngine->addModule(std::move(*into)); - } - JL_UNLOCK(&jl_ExecutionEngine->jitlock); // Might GC } if (timed) { if (measure_compile_time_enabled) { @@ -459,9 +769,6 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void * } ct->reentrant_timing &= ~1ull; } - if (ctx.getContext()) { - jl_ExecutionEngine->releaseContext(std::move(ctx)); - } return success; } @@ -512,18 +819,13 @@ extern "C" JL_DLLEXPORT_CODEGEN int jl_compile_codeinst_impl(jl_code_instance_t *ci) { int newly_compiled = 0; - if (jl_atomic_load_relaxed(&ci->invoke) != NULL) { - return newly_compiled; - } - JL_LOCK(&jl_ExecutionEngine->jitlock); if (jl_atomic_load_relaxed(&ci->invoke) == NULL) { ++SpecFPtrCount; uint64_t start = jl_typeinf_timing_begin(); - _jl_compile_codeinst(ci, NULL, *jl_ExecutionEngine->getContext()); + _jl_compile_codeinst(ci, NULL); jl_typeinf_timing_end(start, 0); newly_compiled = 1; } - JL_UNLOCK(&jl_ExecutionEngine->jitlock); // Might GC return newly_compiled; } @@ -541,38 +843,39 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec) uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) compiler_start_time = jl_hrtime(); - JL_LOCK(&jl_ExecutionEngine->jitlock); - if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) { - jl_code_info_t *src = NULL; - JL_GC_PUSH1(&src); - jl_method_t *def = unspec->def->def.method; - if (jl_is_method(def)) { - src = (jl_code_info_t*)def->source; - if (src && (jl_value_t*)src != jl_nothing) - src = jl_uncompress_ir(def, NULL, (jl_value_t*)src); - } - else { - jl_method_instance_t *mi = unspec->def; - jl_code_instance_t *uninferred = jl_cached_uninferred( - jl_atomic_load_relaxed(&mi->cache), 1); - assert(uninferred); - src = (jl_code_info_t*)jl_atomic_load_relaxed(&uninferred->inferred); - assert(src); - } - if (src) { + jl_code_info_t *src = NULL; + JL_GC_PUSH1(&src); + jl_method_t *def = unspec->def->def.method; + if (jl_is_method(def)) { + src = (jl_code_info_t*)def->source; + if (src && (jl_value_t*)src != jl_nothing) + src = jl_uncompress_ir(def, NULL, (jl_value_t*)src); + } + else { + jl_method_instance_t *mi = unspec->def; + jl_code_instance_t *uninferred = jl_cached_uninferred( + jl_atomic_load_relaxed(&mi->cache), 1); + assert(uninferred); + src = (jl_code_info_t*)jl_atomic_load_relaxed(&uninferred->inferred); + assert(src); + } + if (src) { + // TODO: first prepare recursive_compile_graph(unspec, src) before taking this lock to avoid recursion? + JL_LOCK(&jitlock); // TODO: use a better lock + if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) { assert(jl_is_code_info(src)); ++UnspecFPtrCount; jl_debuginfo_t *debuginfo = src->debuginfo; jl_atomic_store_release(&unspec->debuginfo, debuginfo); // n.b. this assumes the field was previously NULL, which is not entirely true jl_gc_wb(unspec, debuginfo); - _jl_compile_codeinst(unspec, src, *jl_ExecutionEngine->getContext()); + _jl_compile_codeinst(unspec, src); } - jl_callptr_t null = nullptr; - // if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort - jl_atomic_cmpswap(&unspec->invoke, &null, jl_fptr_interpret_call_addr); - JL_GC_POP(); + JL_UNLOCK(&jitlock); // Might GC } - JL_UNLOCK(&jl_ExecutionEngine->jitlock); // Might GC + JL_GC_POP(); + jl_callptr_t null = nullptr; + // if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort + jl_atomic_cmpswap(&unspec->invoke, &null, jl_fptr_interpret_call_addr); if (timed) { if (measure_compile_time_enabled) { auto end = jl_hrtime(); @@ -634,8 +937,8 @@ static auto countBasicBlocks(const Function &F) JL_NOTSAFEPOINT static constexpr size_t N_optlevels = 4; -static Expected selectOptLevel(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) { - TSM.withModuleDo([](Module &M) { +static orc::ThreadSafeModule selectOptLevel(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT { + TSM.withModuleDo([](Module &M) JL_NOTSAFEPOINT { size_t opt_level = std::max(static_cast(jl_options.opt_level), 0); do { if (jl_generating_output()) { @@ -661,7 +964,10 @@ static Expected selectOptLevel(orc::ThreadSafeModule TSM, opt_level = std::min(opt_level, N_optlevels - 1); M.addModuleFlag(Module::Warning, "julia.optlevel", opt_level); }); - return std::move(TSM); + return TSM; +} +static orc::ThreadSafeModule selectOptLevel(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { + return selectOptLevel(std::move(TSM)); } void jl_register_jit_object(const object::ObjectFile &debugObj, @@ -699,8 +1005,8 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin { { std::lock_guard lock(PluginMutex); assert(PendingObjs.count(&MR) == 0); - PendingObjs[&MR] = std::unique_ptr( - new JITObjectInfo{std::move(NewBuffer), std::move(NewObj), {}}); + PendingObjs[&MR] = std::unique_ptr(new JITObjectInfo{ + std::move(NewBuffer), std::move(NewObj), {}}); } } @@ -870,7 +1176,7 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin { // TODO: Port our memory management optimisations to JITLink instead of using the // default InProcessMemoryManager. -std::unique_ptr createJITLinkMemoryManager() { +std::unique_ptr createJITLinkMemoryManager() JL_NOTSAFEPOINT { #if JL_LLVM_VERSION < 160000 return cantFail(orc::MapperJITLinkMemoryManager::CreateWithMapper()); #else @@ -900,7 +1206,7 @@ class JLEHFrameRegistrar final : public jitlink::EHFrameRegistrar { } }; -RTDyldMemoryManager* createRTDyldMemoryManager(void); +RTDyldMemoryManager *createRTDyldMemoryManager(void) JL_NOTSAFEPOINT; // A simple forwarding class, since OrcJIT v2 needs a unique_ptr, while we have a shared_ptr class ForwardingMemoryManager : public RuntimeDyld::MemoryManager { @@ -909,7 +1215,10 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager { public: ForwardingMemoryManager(std::shared_ptr MemMgr) : MemMgr(MemMgr) {} - virtual ~ForwardingMemoryManager() = default; + ForwardingMemoryManager(ForwardingMemoryManager &) = delete; + virtual ~ForwardingMemoryManager() { + assert(!MemMgr); + } virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, StringRef SectionName) override { @@ -947,7 +1256,11 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager { return MemMgr->deregisterEHFrames(); } virtual bool finalizeMemory(std::string *ErrMsg = nullptr) override { - return MemMgr->finalizeMemory(ErrMsg); + bool b = false; + if (MemMgr.use_count() == 2) + b = MemMgr->finalizeMemory(ErrMsg); + MemMgr.reset(); + return b; } virtual void notifyObjectLoaded(RuntimeDyld &RTDyld, const object::ObjectFile &Obj) override { @@ -955,10 +1268,10 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager { } }; - -void registerRTDyldJITObject(const object::ObjectFile &Object, - const RuntimeDyld::LoadedObjectInfo &L, - const std::shared_ptr &MemMgr) +#ifndef JL_USE_JITLINK +static void registerRTDyldJITObject(orc::MaterializationResponsibility &MR, + const object::ObjectFile &Object, + const RuntimeDyld::LoadedObjectInfo &L) { StringMap loadedSections; for (const object::SectionRef &lSection : Object.sections()) { @@ -980,6 +1293,8 @@ void registerRTDyldJITObject(const object::ObjectFile &Object, auto DebugObject = L.getObjectForDebug(Object); // ELF requires us to make a copy to mutate the header with the section load addresses. On other platforms this is a no-op. jl_register_jit_object(DebugObject.getBinary() ? *DebugObject.getBinary() : Object, getLoadAddress); } +#endif + namespace { static std::unique_ptr createTargetMachine() JL_NOTSAFEPOINT { TargetOptions options = TargetOptions(); @@ -1078,9 +1393,6 @@ namespace { fixupTM(*TM); return std::unique_ptr(TM); } -} // namespace - -namespace { typedef NewPM PassManager; @@ -1131,14 +1443,14 @@ namespace { }; template - struct OptimizerT { - OptimizerT(TargetMachine &TM, SmallVector, 0> &printers, std::mutex &llvm_printing_mutex) JL_NOTSAFEPOINT { + struct sizedOptimizerT { + sizedOptimizerT(TargetMachine &TM, SmallVector, 0> &printers, std::mutex &llvm_printing_mutex) JL_NOTSAFEPOINT { for (size_t i = 0; i < N; i++) { PMs[i] = std::make_unique>>(PMCreator(TM, i, printers, llvm_printing_mutex)); } } - OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { + orc::ThreadSafeModule operator()(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT { TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT { auto PoolIdx = cast(cast(M.getModuleFlag("julia.optlevel"))->getValue())->getZExtValue(); assert(PoolIdx < N && "Invalid optimization pool index"); @@ -1243,12 +1555,23 @@ namespace { llvm_unreachable("optlevel is between 0 and 3!"); } }); - return Expected{std::move(TSM)}; + return TSM; } private: std::array>>, N> PMs; }; + // shim for converting a unique_ptr to a TransformFunction to a TransformFunction + template + struct IRTransformRef { + IRTransformRef(T &transform) : transform(transform) {} + OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { + return transform(std::move(TSM), R); + } + private: + T &transform; + }; + template struct CompilerT : orc::IRCompileLayer::IRCompiler { @@ -1264,7 +1587,8 @@ namespace { size_t PoolIdx; if (auto opt_level = M.getModuleFlag("julia.optlevel")) { PoolIdx = cast(cast(opt_level)->getValue())->getZExtValue(); - } else { + } + else { PoolIdx = jl_options.opt_level; } assert(PoolIdx < N && "Invalid optimization level for compiler!"); @@ -1273,74 +1597,89 @@ namespace { std::array>>, N> TMs; }; +} - struct JITPointersT { - - JITPointersT(SharedBytesT &SharedBytes, std::mutex &Lock) JL_NOTSAFEPOINT - : SharedBytes(SharedBytes), Lock(Lock) {} +struct JuliaOJIT::OptimizerT { + OptimizerT(TargetMachine &TM, SmallVector, 0> &printers, std::mutex &llvm_printing_mutex) + : opt(TM, printers, llvm_printing_mutex) {} + orc::ThreadSafeModule operator()(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT { + return opt(std::move(TSM)); + } + OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { + return opt(std::move(TSM)); + } +private: + struct sizedOptimizerT opt; +}; - Expected operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { - TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT { - std::lock_guard locked(Lock); - for (auto &GV : make_early_inc_range(M.globals())) { - if (auto *Shared = getSharedBytes(GV)) { - ++InternedGlobals; - GV.replaceAllUsesWith(Shared); - GV.eraseFromParent(); - } +struct JuliaOJIT::JITPointersT { + JITPointersT(SharedBytesT &SharedBytes, std::mutex &Lock) JL_NOTSAFEPOINT + : SharedBytes(SharedBytes), Lock(Lock) {} + + orc::ThreadSafeModule operator()(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT { + TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT { + std::lock_guard locked(Lock); + for (auto &GV : make_early_inc_range(M.globals())) { + if (auto *Shared = getSharedBytes(GV)) { + ++InternedGlobals; + GV.replaceAllUsesWith(Shared); + GV.eraseFromParent(); } + } - // Windows needs some inline asm to help - // build unwind tables, if they have any functions to decorate - if (!M.functions().empty()) - jl_decorate_module(M); - }); - return std::move(TSM); - } + // Windows needs some inline asm to help + // build unwind tables, if they have any functions to decorate + if (!M.functions().empty()) + jl_decorate_module(M); + }); + return TSM; + } + Expected operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { + return operator()(std::move(TSM)); + } - private: - // optimize memory by turning long strings into memoized copies, instead of - // making a copy per object file of output. - // we memoize them using a StringSet with a custom-alignment allocator - // to ensure they are properly aligned - Constant *getSharedBytes(GlobalVariable &GV) JL_NOTSAFEPOINT { - // We could probably technically get away with - // interning even external linkage globals, - // as long as they have global unnamedaddr, - // but currently we shouldn't be emitting those - // except in imaging mode, and we don't want to - // do this optimization there. - if (GV.hasExternalLinkage() || !GV.hasGlobalUnnamedAddr()) { - return nullptr; - } - if (!GV.hasInitializer()) { - return nullptr; - } - if (!GV.isConstant()) { - return nullptr; - } - auto CDS = dyn_cast(GV.getInitializer()); - if (!CDS) { - return nullptr; - } - StringRef Data = CDS->getRawDataValues(); - if (Data.size() < 16) { - // Cutoff, since we don't want to intern small strings - return nullptr; - } - Align Required = GV.getAlign().valueOrOne(); - Align Preferred = MaxAlignedAlloc::alignment(Data.size()); - if (Required > Preferred) - return nullptr; - StringRef Interned = SharedBytes.insert(Data).first->getKey(); - assert(llvm::isAddrAligned(Preferred, Interned.data())); - return literal_static_pointer_val(Interned.data(), GV.getType()); +private: + // optimize memory by turning long strings into memoized copies, instead of + // making a copy per object file of output. + // we memoize them using a StringSet with a custom-alignment allocator + // to ensure they are properly aligned + Constant *getSharedBytes(GlobalVariable &GV) JL_NOTSAFEPOINT { + // We could probably technically get away with + // interning even external linkage globals, + // as long as they have global unnamedaddr, + // but currently we shouldn't be emitting those + // except in imaging mode, and we don't want to + // do this optimization there. + if (GV.hasExternalLinkage() || !GV.hasGlobalUnnamedAddr()) { + return nullptr; } + if (!GV.hasInitializer()) { + return nullptr; + } + if (!GV.isConstant()) { + return nullptr; + } + auto CDS = dyn_cast(GV.getInitializer()); + if (!CDS) { + return nullptr; + } + StringRef Data = CDS->getRawDataValues(); + if (Data.size() < 16) { + // Cutoff, since we don't want to intern small strings + return nullptr; + } + Align Required = GV.getAlign().valueOrOne(); + Align Preferred = MaxAlignedAlloc::alignment(Data.size()); + if (Required > Preferred) + return nullptr; + StringRef Interned = SharedBytes.insert(Data).first->getKey(); + assert(llvm::isAddrAligned(Preferred, Interned.data())); + return literal_static_pointer_val(Interned.data(), GV.getType()); + } - SharedBytesT &SharedBytes; - std::mutex &Lock; - }; -} + SharedBytesT &SharedBytes; + std::mutex &Lock; +}; struct JuliaOJIT::DLSymOptimizer { @@ -1362,20 +1701,24 @@ struct JuliaOJIT::DLSymOptimizer { #undef INIT_RUNTIME_LIBRARY } + ~DLSymOptimizer() JL_NOTSAFEPOINT = default; - void *lookup_symbol(void *libhandle, const char *fname) { + void *lookup_symbol(void *libhandle, const char *fname) JL_NOTSAFEPOINT { void *addr; jl_dlsym(libhandle, fname, &addr, 0); return addr; } - void *lookup(const char *libname, const char *fname) { + void *lookup(const char *libname, const char *fname) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER { StringRef lib(libname); StringRef f(fname); std::lock_guard lock(symbols_mutex); auto uit = user_symbols.find(lib); if (uit == user_symbols.end()) { + jl_task_t *ct = jl_current_task; + int8_t gc_state = jl_gc_unsafe_enter(ct->ptls); void *handle = jl_get_library_(libname, 0); + jl_gc_unsafe_leave(ct->ptls, gc_state); if (!handle) return nullptr; uit = user_symbols.insert(std::make_pair(lib, std::make_pair(handle, StringMap()))).first; @@ -1390,7 +1733,7 @@ struct JuliaOJIT::DLSymOptimizer { return handle; } - void *lookup(uintptr_t libidx, const char *fname) { + void *lookup(uintptr_t libidx, const char *fname) JL_NOTSAFEPOINT { std::lock_guard lock(symbols_mutex); runtime_symbols.resize(std::max(runtime_symbols.size(), libidx + 1)); auto it = runtime_symbols[libidx].second.find(fname); @@ -1402,7 +1745,7 @@ struct JuliaOJIT::DLSymOptimizer { return handle; } - void operator()(Module &M) { + void operator()(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER { for (auto &GV : M.globals()) { auto Name = GV.getName(); if (Name.starts_with("jlplt") && Name.ends_with("got")) { @@ -1518,7 +1861,7 @@ struct JuliaOJIT::DLSymOptimizer { bool named; }; -void optimizeDLSyms(Module &M) { +void optimizeDLSyms(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER { JuliaOJIT::DLSymOptimizer(true)(M); } @@ -1552,10 +1895,6 @@ llvm::DataLayout jl_create_datalayout(TargetMachine &TM) { return jl_data_layout; } -#ifdef _COMPILER_ASAN_ENABLED_ -int64_t ___asan_globals_registered; -#endif - JuliaOJIT::JuliaOJIT() : TM(createTargetMachine()), DL(jl_create_datalayout(*TM)), @@ -1564,34 +1903,27 @@ JuliaOJIT::JuliaOJIT() JD(ES.createBareJITDylib("JuliaOJIT")), ExternalJD(ES.createBareJITDylib("JuliaExternal")), DLSymOpt(std::make_unique(false)), - ContextPool([](){ - auto ctx = std::make_unique(); - #if JL_LLVM_VERSION < 170000 - SetOpaquePointer(*ctx); - #endif - return orc::ThreadSafeContext(std::move(ctx)); - }), #ifdef JL_USE_JITLINK MemMgr(createJITLinkMemoryManager()), ObjectLayer(ES, *MemMgr), - CompileLayer(ES, ObjectLayer, std::make_unique>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)), #else MemMgr(createRTDyldMemoryManager()), - ObjectLayer( + UnlockedObjectLayer( ES, [this]() { std::unique_ptr result(new ForwardingMemoryManager(MemMgr)); return result; } ), - LockLayer(ObjectLayer), - CompileLayer(ES, LockLayer, std::make_unique>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)), + ObjectLayer(UnlockedObjectLayer), #endif - JITPointersLayer(ES, CompileLayer, orc::IRTransformLayer::TransformFunction(JITPointersT(SharedBytes, RLST_mutex))), - OptimizeLayer(ES, JITPointersLayer, orc::IRTransformLayer::TransformFunction(OptimizerT(*TM, PrintLLVMTimers, llvm_printing_mutex))), - OptSelLayer(ES, OptimizeLayer, orc::IRTransformLayer::TransformFunction(selectOptLevel)) + CompileLayer(ES, ObjectLayer, std::make_unique>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)), + JITPointers(std::make_unique(SharedBytes, RLST_mutex)), + JITPointersLayer(ES, CompileLayer, IRTransformRef(*JITPointers)), + Optimizers(std::make_unique(*TM, PrintLLVMTimers, llvm_printing_mutex)), + OptimizeLayer(ES, JITPointersLayer, IRTransformRef(*Optimizers)), + OptSelLayer(ES, OptimizeLayer, static_cast(selectOptLevel)) { - JL_MUTEX_INIT(&this->jitlock, "JuliaOJIT"); #ifdef JL_USE_JITLINK # if defined(LLVM_SHLIB) // When dynamically linking against LLVM, use our custom EH frame registration code @@ -1606,12 +1938,7 @@ JuliaOJIT::JuliaOJIT() ObjectLayer.addPlugin(std::make_unique()); ObjectLayer.addPlugin(std::make_unique(jit_bytes_size)); #else - ObjectLayer.setNotifyLoaded( - [this](orc::MaterializationResponsibility &MR, - const object::ObjectFile &Object, - const RuntimeDyld::LoadedObjectInfo &LO) { - registerRTDyldJITObject(Object, LO, MemMgr); - }); + UnlockedObjectLayer.setNotifyLoaded(registerRTDyldJITObject); #endif std::string ErrorStr; @@ -1741,19 +2068,34 @@ JuliaOJIT::JuliaOJIT() #endif cantFail(GlobalJD.define(orc::absoluteSymbols(msan_crt))); #endif +#if JL_LLVM_VERSION < 190000 #ifdef _COMPILER_ASAN_ENABLED_ + // this is a hack to work around a bad assertion: + // /workspace/srcdir/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp:3028: llvm::Error llvm::orc::ExecutionSession::OL_notifyResolved(llvm::orc::MaterializationResponsibility&, const SymbolMap&): Assertion `(KV.second.getFlags() & ~JITSymbolFlags::Common) == (I->second & ~JITSymbolFlags::Common) && "Resolving symbol with incorrect flags"' failed. + // hopefully fixed upstream by e7698a13e319a9919af04d3d693a6f6ea7168a44 + static int64_t jl___asan_globals_registered; orc::SymbolMap asan_crt; #if JL_LLVM_VERSION >= 170000 - asan_crt[mangle("___asan_globals_registered")] = {ExecutorAddr::fromPtr(&___asan_globals_registered), JITSymbolFlags::Exported}; + asan_crt[mangle("___asan_globals_registered")] = {ExecutorAddr::fromPtr(&jl___asan_globals_registered), JITSymbolFlags::Common | JITSymbolFlags::Exported}; #else - asan_crt[mangle("___asan_globals_registered")] = JITEvaluatedSymbol::fromPointer(&___asan_globals_registered, JITSymbolFlags::Exported); + asan_crt[mangle("___asan_globals_registered")] = JITEvaluatedSymbol::fromPointer(&jl___asan_globals_registered, JITSymbolFlags::Common | JITSymbolFlags::Exported); #endif cantFail(JD.define(orc::absoluteSymbols(asan_crt))); #endif +#endif } JuliaOJIT::~JuliaOJIT() = default; +ThreadSafeContext JuliaOJIT::makeContext() +{ + auto ctx = std::make_unique(); + #if JL_LLVM_VERSION < 170000 + SetOpaquePointer(*ctx); + #endif + return orc::ThreadSafeContext(std::move(ctx)); +} + orc::SymbolStringPtr JuliaOJIT::mangle(StringRef Name) { std::string MangleName = getMangledName(Name); @@ -1773,40 +2115,32 @@ void JuliaOJIT::addModule(orc::ThreadSafeModule TSM) { JL_TIMING(LLVM_JIT, JIT_Total); ++ModulesAdded; -#ifndef JL_USE_JITLINK - orc::SymbolLookupSet NewExports; - TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT { - for (auto &F : M.global_values()) { - if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) { - auto Name = ES.intern(getMangledName(F.getName())); - NewExports.add(std::move(Name)); - } - } - assert(!verifyLLVMIR(M)); - }); -#endif - - auto Err = OptSelLayer.add(JD, std::move(TSM)); + TSM = selectOptLevel(std::move(TSM)); + TSM = (*Optimizers)(std::move(TSM)); + TSM = (*JITPointers)(std::move(TSM)); + auto Lock = TSM.getContext().getLock(); + Module &M = *TSM.getModuleUnlocked(); + // Treat this as if one of the passes might contain a safepoint + // even though that shouldn't be the case and might be unwise + Expected> Obj = CompileLayer.getCompiler()(M); + if (!Obj) { + ES.reportError(Obj.takeError()); + errs() << "Failed to add module to JIT!\n"; + errs() << "Dumping failing module\n" << M << "\n"; + return; + } + { auto release = std::move(Lock); } + auto Err = JuliaOJIT::addObjectFile(JD, std::move(*Obj)); if (Err) { ES.reportError(std::move(Err)); - errs() << "Failed to add module to JIT!\n"; + errs() << "Failed to add objectfile to JIT!\n"; abort(); } -#ifndef JL_USE_JITLINK - // force eager compilation (for now), due to memory management specifics - // (can't handle compilation recursion) - auto Lookups = ES.lookup({{&JD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly}}, NewExports); - if (!Lookups) { - ES.reportError(Lookups.takeError()); - errs() << "Failed to lookup symbols in module!\n"; - } -#endif } Error JuliaOJIT::addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM, bool ShouldOptimize) { - if (auto Err = TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT -> Error - { + if (auto Err = TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT -> Error { if (M.getDataLayout().isDefault()) M.setDataLayout(DL); if (M.getDataLayout() != DL) @@ -1815,24 +2149,29 @@ Error JuliaOJIT::addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM, M.getDataLayout().getStringRepresentation() + " (module) vs " + DL.getStringRepresentation() + " (jit)", inconvertibleErrorCode()); - + // OrcJIT requires that all modules / files have unique names: + M.setModuleIdentifier((M.getModuleIdentifier() + Twine("-") + Twine(jl_atomic_fetch_add_relaxed(&jitcounter, 1))).str()); return Error::success(); - })) + })) return Err; + //if (ShouldOptimize) + // return OptimizeLayer.add(JD, std::move(TSM)); return CompileLayer.add(JD.getDefaultResourceTracker(), std::move(TSM)); } Error JuliaOJIT::addObjectFile(orc::JITDylib &JD, std::unique_ptr Obj) { assert(Obj && "Can not add null object"); -#ifdef JL_USE_JITLINK + // OrcJIT requires that all modules / files have unique names: + // https://llvm.org/doxygen/namespacellvm_1_1orc.html#a1f5a1bc60c220cdccbab0f26b2a425e1 + // so we have to force a copy here + std::string Name = ("jitted-" + Twine(jl_atomic_fetch_add_relaxed(&jitcounter, 1))).str(); + Obj = Obj->getMemBufferCopy(Obj->getBuffer(), Name); return ObjectLayer.add(JD.getDefaultResourceTracker(), std::move(Obj)); -#else - return LockLayer.add(JD.getDefaultResourceTracker(), std::move(Obj)); -#endif } SmallVector JuliaOJIT::findSymbols(ArrayRef Names) { + // assert(MemMgr.use_count() == 1); (true single-threaded, but slightly race-y to assert it with concurrent threads) DenseMap Unmangled; orc::SymbolLookupSet Exports; for (StringRef Name : Names) { @@ -1978,6 +2317,7 @@ void JuliaOJIT::enableJITDebuggingSupport() addAbsoluteToMap(GDBFunctions,llvm_orc_registerJITLoaderGDBAllocAction); auto registerJITLoaderGDBWrapper = addAbsoluteToMap(GDBFunctions,llvm_orc_registerJITLoaderGDBWrapper); cantFail(JD.define(orc::absoluteSymbols(GDBFunctions))); + (void)registerJITLoaderGDBWrapper; if (TM->getTargetTriple().isOSBinFormatMachO()) ObjectLayer.addPlugin(cantFail(orc::GDBJITDebugInfoRegistrationPlugin::Create(ES, JD, TM->getTargetTriple()))); #ifndef _COMPILER_ASAN_ENABLED_ // TODO: Fix duplicated sections spam #51794 @@ -2013,12 +2353,12 @@ void JuliaOJIT::enableOProfileJITEventListener() void JuliaOJIT::enablePerfJITEventListener() { #if JL_LLVM_VERSION >= 180000 - orc::SymbolMap PerfFunctions; - auto StartAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfStart); - auto EndAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfEnd); - auto ImplAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfImpl); - cantFail(JD.define(orc::absoluteSymbols(PerfFunctions))); if (TM->getTargetTriple().isOSBinFormatELF()) { + orc::SymbolMap PerfFunctions; + auto StartAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfStart); + auto EndAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfEnd); + auto ImplAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfImpl); + cantFail(JD.define(orc::absoluteSymbols(PerfFunctions))); ObjectLayer.addPlugin(cantFail(DebugInfoPreservationPlugin::Create())); //ObjectLayer.addPlugin(cantFail(PerfSupportPlugin::Create( // ES.getExecutorProcessControl(), *JD, true, true))); @@ -2032,7 +2372,7 @@ void JuliaOJIT::enablePerfJITEventListener() void JuliaOJIT::RegisterJITEventListener(JITEventListener *L) { if (L) - ObjectLayer.registerJITEventListener(*L); + UnlockedObjectLayer.registerJITEventListener(*L); } void JuliaOJIT::enableJITDebuggingSupport() { @@ -2071,7 +2411,7 @@ std::string JuliaOJIT::getMangledName(const GlobalValue *GV) size_t JuliaOJIT::getTotalBytes() const { - auto bytes = jit_bytes_size.load(std::memory_order_relaxed); + auto bytes = jl_atomic_load_relaxed(&jit_bytes_size); #ifndef JL_USE_JITLINK size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT; bytes += getRTDyldMemoryManagerTotalBytes(MemMgr.get()); @@ -2081,7 +2421,7 @@ size_t JuliaOJIT::getTotalBytes() const void JuliaOJIT::addBytes(size_t bytes) { - jit_bytes_size.fetch_add(bytes, std::memory_order_relaxed); + jl_atomic_fetch_add_relaxed(&jit_bytes_size, bytes); } void JuliaOJIT::printTimers() @@ -2326,74 +2666,6 @@ static void jl_decorate_module(Module &M) { #undef ASM_USES_ELF } -#ifndef JL_USE_JITLINK -// Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable -static int jl_add_to_ee( - orc::ThreadSafeModule &M, - const StringMap &NewExports, - DenseMap &Queued, - SmallVectorImpl &Stack) -{ - // First check if the TSM is empty (already compiled) - if (!M) - return 0; - // Next check and record if it is on the stack somewhere - { - auto &Id = Queued[&M]; - if (Id) - return Id; - Stack.push_back(&M); - Id = Stack.size(); - } - // Finally work out the SCC - int depth = Stack.size(); - int MergeUp = depth; - SmallVector Children; - M.withModuleDo([&](Module &m) JL_NOTSAFEPOINT { - for (auto &F : m.global_objects()) { - if (F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) { - auto Callee = NewExports.find(F.getName()); - if (Callee != NewExports.end()) { - auto *CM = Callee->second; - if (*CM && CM != &M) { - auto Down = Queued.find(CM); - if (Down != Queued.end()) - MergeUp = std::min(MergeUp, Down->second); - else - Children.push_back(CM); - } - } - } - } - }); - assert(MergeUp > 0); - for (auto *CM : Children) { - int Down = jl_add_to_ee(*CM, NewExports, Queued, Stack); - assert(Down <= (int)Stack.size()); - if (Down) - MergeUp = std::min(MergeUp, Down); - } - if (MergeUp < depth) - return MergeUp; - while (1) { - // Not in a cycle (or at the top of it) - // remove SCC state and merge every CM from the cycle into M - orc::ThreadSafeModule *CM = Stack.back(); - auto it = Queued.find(CM); - assert(it->second == (int)Stack.size()); - Queued.erase(it); - Stack.pop_back(); - if ((int)Stack.size() < depth) { - assert(&M == CM); - break; - } - jl_merge_module(M, std::move(*CM)); - } - jl_ExecutionEngine->addModule(std::move(M)); - return 0; -} -#endif - // helper function for adding a DLLImport (dlsym) address to the execution engine void add_named_global(StringRef name, void *addr) { diff --git a/src/jitlayers.h b/src/jitlayers.h index f4b9a6ea5395a..ba4ac3081795e 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -69,7 +69,6 @@ using namespace llvm; extern "C" jl_cgparams_t jl_default_cgparams; -extern arraylist_t new_invokes; DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ThreadSafeContext, LLVMOrcThreadSafeContextRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ThreadSafeModule, LLVMOrcThreadSafeModuleRef) @@ -154,11 +153,11 @@ struct jl_locked_stream { std::unique_lock lck; ios_t *&stream; - lock(std::mutex &mutex, ios_t *&stream) JL_NOTSAFEPOINT + lock(std::mutex &mutex, ios_t *&stream) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER : lck(mutex), stream(stream) {} lock(lock&) = delete; lock(lock&&) JL_NOTSAFEPOINT = default; - ~lock() JL_NOTSAFEPOINT = default; + ~lock() JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT = default; ios_t *&operator*() JL_NOTSAFEPOINT { return stream; @@ -177,8 +176,8 @@ struct jl_locked_stream { } }; - jl_locked_stream() JL_NOTSAFEPOINT = default; - ~jl_locked_stream() JL_NOTSAFEPOINT = default; + jl_locked_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER = default; + ~jl_locked_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE = default; lock operator*() JL_NOTSAFEPOINT { return lock(mutex, stream); @@ -210,12 +209,12 @@ struct jl_codegen_call_target_t { jl_returninfo_t::CallingConv cc; unsigned return_roots; llvm::Function *decl; + llvm::Function *oc; bool specsig; }; typedef SmallVector, 0> jl_workqueue_t; -// TODO DenseMap? -typedef std::map> jl_compiled_functions_t; + typedef std::list> CallFrames; struct jl_codegen_params_t { orc::ThreadSafeContext tsctx; @@ -229,7 +228,6 @@ struct jl_codegen_params_t { typedef StringMap SymMapGV; // outputs jl_workqueue_t workqueue; - jl_compiled_functions_t compiled_functions; std::map global_targets; std::map, GlobalVariable*> external_fns; std::map ditypes; @@ -292,13 +290,20 @@ enum CompilationPolicy { Extern = 1, }; -void jl_compile_workqueue( - jl_codegen_params_t ¶ms, - CompilationPolicy policy); - Function *jl_cfunction_object(jl_function_t *f, jl_value_t *rt, jl_tupletype_t *argt, jl_codegen_params_t ¶ms); +Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptrName, Module *M, jl_codegen_params_t ¶ms) JL_NOTSAFEPOINT; +void emit_specsig_to_fptr1( + Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots, + jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure, + size_t nargs, + jl_codegen_params_t ¶ms, + Function *target, + size_t min_world, size_t max_world) JL_NOTSAFEPOINT; +Function *get_or_emit_fptr1(StringRef Name, Module *M) JL_NOTSAFEPOINT; +void jl_init_function(Function *F, const Triple &TT) JL_NOTSAFEPOINT; + void add_named_global(StringRef name, void *addr) JL_NOTSAFEPOINT; static inline Constant *literal_static_pointer_val(const void *p, Type *T) JL_NOTSAFEPOINT @@ -371,6 +376,11 @@ using OptimizerResultT = Expected; using SharedBytesT = StringSet::MapEntryTy)>>; class JuliaOJIT { +private: + // any verification the user wants to do when adding an OwningResource to the pool + template + static void verifyResource(AnyT &resource) JL_NOTSAFEPOINT { } + static void verifyResource(orc::ThreadSafeContext &context) JL_NOTSAFEPOINT { assert(context.getContext()); } public: #ifdef JL_USE_JITLINK typedef orc::ObjectLinkingLayer ObjLayerT; @@ -385,13 +395,13 @@ class JuliaOJIT { std::unique_ptr O) override { JL_TIMING(LLVM_JIT, JIT_Link); #ifndef JL_USE_JITLINK - std::lock_guard lock(EmissionMutex); + std::lock_guard lock(EmissionMutex); #endif BaseLayer.emit(std::move(R), std::move(O)); } private: orc::ObjectLayer &BaseLayer; - std::mutex EmissionMutex; + std::recursive_mutex EmissionMutex; }; #endif typedef orc::IRCompileLayer CompileLayerT; @@ -420,11 +430,16 @@ class JuliaOJIT { : pool(pool), resource(std::move(resource)) {} OwningResource(const OwningResource &) = delete; OwningResource &operator=(const OwningResource &) = delete; - OwningResource(OwningResource &&) JL_NOTSAFEPOINT = default; + OwningResource(OwningResource &&other) JL_NOTSAFEPOINT + : pool(other.pool), resource(std::move(other.resource)) { + other.resource.reset(); + } OwningResource &operator=(OwningResource &&) JL_NOTSAFEPOINT = default; ~OwningResource() JL_NOTSAFEPOINT { // _LEAVE - if (resource) + if (resource) { + verifyResource(*resource); pool.release(std::move(*resource)); + } } ResourceT release() JL_NOTSAFEPOINT { ResourceT res(std::move(*resource)); @@ -510,7 +525,11 @@ class JuliaOJIT { std::unique_ptr mutex; }; + typedef ResourcePool> ContextPoolT; + struct DLSymOptimizer; + struct OptimizerT; + struct JITPointersT; #ifndef JL_USE_JITLINK void RegisterJITEventListener(JITEventListener *L) JL_NOTSAFEPOINT; @@ -528,7 +547,7 @@ class JuliaOJIT { orc::SymbolStringPtr mangle(StringRef Name) JL_NOTSAFEPOINT; void addGlobalMapping(StringRef Name, uint64_t Addr) JL_NOTSAFEPOINT; - void addModule(orc::ThreadSafeModule M) JL_NOTSAFEPOINT; + void addModule(orc::ThreadSafeModule M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER; //Methods for the C API Error addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM, @@ -552,15 +571,7 @@ class JuliaOJIT { uint64_t getGlobalValueAddress(StringRef Name) JL_NOTSAFEPOINT; uint64_t getFunctionAddress(StringRef Name) JL_NOTSAFEPOINT; StringRef getFunctionAtAddress(uint64_t Addr, jl_callptr_t invoke, jl_code_instance_t *codeinst) JL_NOTSAFEPOINT; - auto getContext() JL_NOTSAFEPOINT { - return *ContextPool; - } - orc::ThreadSafeContext acquireContext() { // JL_NOTSAFEPOINT_ENTER? - return ContextPool.acquire(); - } - void releaseContext(orc::ThreadSafeContext &&ctx) { // JL_NOTSAFEPOINT_LEAVE? - ContextPool.release(std::move(ctx)); - } + orc::ThreadSafeContext makeContext() JL_NOTSAFEPOINT; const DataLayout& getDataLayout() const JL_NOTSAFEPOINT; // TargetMachine pass-through methods @@ -576,22 +587,21 @@ class JuliaOJIT { void addBytes(size_t bytes) JL_NOTSAFEPOINT; void printTimers() JL_NOTSAFEPOINT; - jl_locked_stream &get_dump_emitted_mi_name_stream() JL_NOTSAFEPOINT { + jl_locked_stream &get_dump_emitted_mi_name_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER { return dump_emitted_mi_name_stream; } - jl_locked_stream &get_dump_compiles_stream() JL_NOTSAFEPOINT { + jl_locked_stream &get_dump_compiles_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER { return dump_compiles_stream; } - jl_locked_stream &get_dump_llvm_opt_stream() JL_NOTSAFEPOINT { + jl_locked_stream &get_dump_llvm_opt_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER { return dump_llvm_opt_stream; } std::string getMangledName(StringRef Name) JL_NOTSAFEPOINT; std::string getMangledName(const GlobalValue *GV) JL_NOTSAFEPOINT; - // Note that this is a safepoint due to jl_get_library_ and jl_dlsym calls - void optimizeDLSyms(Module &M); - - jl_mutex_t jitlock; + // Note that this is a potential safepoint due to jl_get_library_ and jl_dlsym calls + // but may be called from inside safe-regions due to jit compilation locks + void optimizeDLSyms(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER; private: @@ -618,20 +628,20 @@ class JuliaOJIT { std::mutex llvm_printing_mutex{}; SmallVector, 0> PrintLLVMTimers; - ResourcePool> ContextPool; - - std::atomic jit_bytes_size{0}; -#ifndef JL_USE_JITLINK - const std::shared_ptr MemMgr; -#else + _Atomic(size_t) jit_bytes_size{0}; + _Atomic(size_t) jitcounter{0}; +#ifdef JL_USE_JITLINK const std::unique_ptr MemMgr; -#endif ObjLayerT ObjectLayer; -#ifndef JL_USE_JITLINK - LockLayerT LockLayer; +#else + const std::shared_ptr MemMgr; // shared_ptr protected by LockLayerT.EmissionMutex + ObjLayerT UnlockedObjectLayer; + LockLayerT ObjectLayer; #endif CompileLayerT CompileLayer; + std::unique_ptr JITPointers; JITPointersLayerT JITPointersLayer; + std::unique_ptr Optimizers; OptimizeLayerT OptimizeLayer; OptSelLayerT OptSelLayer; }; diff --git a/src/julia.h b/src/julia.h index 7bb5f31eda708..168ba0deff1ec 100644 --- a/src/julia.h +++ b/src/julia.h @@ -426,8 +426,8 @@ typedef struct _jl_opaque_closure_t { jl_value_t *captures; size_t world; jl_method_t *source; - jl_fptr_args_t invoke; - void *specptr; + jl_fptr_args_t invoke; // n.b. despite the similar name, this is not an invoke ABI (jl_call_t / julia.call2), but rather the fptr1 (jl_fptr_args_t / julia.call) ABI + void *specptr; // n.b. despite the similarity in field name, this is not arbitrary private data for jlcall, but rather the codegen ABI for specsig, and is mandatory if specsig is valid } jl_opaque_closure_t; // This type represents an executable operation @@ -475,7 +475,7 @@ typedef struct _jl_code_instance_t { // & 0b100 == From image _Atomic(uint8_t) precompile; // if set, this will be added to the output system image uint8_t relocatability; // nonzero if all roots are built into sysimg or tagged by module key - _Atomic(jl_callptr_t) invoke; // jlcall entry point + _Atomic(jl_callptr_t) invoke; // jlcall entry point usually, but if this codeinst belongs to an OC Method, then this is an jl_fptr_args_t fptr1 instead, unless it is not, because it is a special token object instead union _jl_generic_specptr_t { _Atomic(void*) fptr; _Atomic(jl_fptr_args_t) fptr1; @@ -2339,7 +2339,13 @@ JL_DLLEXPORT JL_CONST_FUNC jl_gcframe_t **(jl_get_pgcstack)(void) JL_GLOBALLY_RO extern JL_DLLIMPORT int jl_task_gcstack_offset; extern JL_DLLIMPORT int jl_task_ptls_offset; +#ifdef __cplusplus +} +#endif #include "julia_locks.h" // requires jl_task_t definition +#ifdef __cplusplus +extern "C" { +#endif // Return the exception currently being handled, or `jl_nothing`. // diff --git a/src/julia_atomics.h b/src/julia_atomics.h index c094afcc54cd5..d05f0fafab28f 100644 --- a/src/julia_atomics.h +++ b/src/julia_atomics.h @@ -103,12 +103,12 @@ enum jl_memory_order { // this wrong thus we include the correct definitions here (with implicit // conversion), instead of using the macro version template -T jl_atomic_load(std::atomic *ptr) +T jl_atomic_load(const std::atomic *ptr) { return std::atomic_load(ptr); } template -T jl_atomic_load_explicit(std::atomic *ptr, std::memory_order order) +T jl_atomic_load_explicit(const std::atomic *ptr, std::memory_order order) { return std::atomic_load_explicit(ptr, order); } diff --git a/src/julia_internal.h b/src/julia_internal.h index 82c91c6d073af..bb8169c6e5f9e 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1715,13 +1715,14 @@ JL_DLLEXPORT int jl_tupletype_length_compat(jl_value_t *v, size_t nargs) JL_NOTS JL_DLLEXPORT jl_value_t *jl_argtype_with_function(jl_value_t *f, jl_value_t *types0); JL_DLLEXPORT jl_value_t *jl_argtype_with_function_type(jl_value_t *ft JL_MAYBE_UNROOTED, jl_value_t *types0); +JL_DLLEXPORT jl_value_t *jl_argtype_without_function(jl_value_t *ftypes); JL_DLLEXPORT unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *field_type); -void register_eh_frames(uint8_t *Addr, size_t Size); -void deregister_eh_frames(uint8_t *Addr, size_t Size); +void register_eh_frames(uint8_t *Addr, size_t Size) JL_NOTSAFEPOINT; +void deregister_eh_frames(uint8_t *Addr, size_t Size) JL_NOTSAFEPOINT; -STATIC_INLINE void *jl_get_frame_addr(void) +STATIC_INLINE void *jl_get_frame_addr(void) JL_NOTSAFEPOINT { #ifdef __GNUC__ return __builtin_frame_address(0); diff --git a/src/julia_locks.h b/src/julia_locks.h index 5774ddada60c6..4d1345177f965 100644 --- a/src/julia_locks.h +++ b/src/julia_locks.h @@ -103,6 +103,33 @@ JL_DLLEXPORT void jl_unlock_field(jl_mutex_t *v) JL_NOTSAFEPOINT; #ifdef __cplusplus } + +#include +#include +// simple C++ shim around a std::unique_lock + gc-safe + disabled finalizers region +// since we nearly always want that combination together +class jl_unique_gcsafe_lock { +public: + int8_t gc_state; + std::unique_lock native; + explicit jl_unique_gcsafe_lock(std::mutex &native) JL_NOTSAFEPOINT_ENTER + { + jl_task_t *ct = jl_current_task; + gc_state = jl_gc_safe_enter(ct->ptls); + this->native = std::unique_lock(native); + ct->ptls->engine_nqueued++; // disables finalizers until inference is finished on this method graph + } + jl_unique_gcsafe_lock(jl_unique_gcsafe_lock &&native) = delete; + jl_unique_gcsafe_lock(jl_unique_gcsafe_lock &native) = delete; + ~jl_unique_gcsafe_lock() JL_NOTSAFEPOINT_LEAVE { + jl_task_t *ct = jl_current_task; + jl_gc_safe_leave(ct->ptls, gc_state); + ct->ptls->engine_nqueued--; // enable finalizers (but don't run them until the next gc) + } + void wait(std::condition_variable& cond) JL_NOTSAFEPOINT { + cond.wait(native); + } +}; #endif #endif diff --git a/src/opaque_closure.c b/src/opaque_closure.c index 0bf3a729cbcb1..9fe36f32d2030 100644 --- a/src/opaque_closure.c +++ b/src/opaque_closure.c @@ -80,14 +80,16 @@ static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t if (!jl_subtype(rt_lb, selected_rt)) { // TODO: It would be better to try to get a specialization with the // correct rt check here (or we could codegen a wrapper). - specptr = NULL; invoke = (jl_fptr_args_t)jl_interpret_opaque_closure; + specptr = NULL; // this will force codegen of the unspecialized version + invoke = (jl_fptr_args_t)jl_interpret_opaque_closure; jl_value_t *ts[2] = {rt_lb, (jl_value_t*)ci->rettype}; selected_rt = jl_type_union(ts, 2); } if (!jl_subtype(ci->rettype, rt_ub)) { // TODO: It would be better to try to get a specialization with the // correct rt check here (or we could codegen a wrapper). - specptr = NULL; invoke = (jl_fptr_args_t)jl_interpret_opaque_closure; + specptr = NULL; // this will force codegen of the unspecialized version + invoke = (jl_fptr_args_t)jl_interpret_opaque_closure; selected_rt = jl_type_intersection(rt_ub, selected_rt); } @@ -108,8 +110,7 @@ static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t jl_value_t *oc_type JL_ALWAYS_LEAFTYPE = jl_apply_type2((jl_value_t*)jl_opaque_closure_type, (jl_value_t*)argt, selected_rt); JL_GC_PROMISE_ROOTED(oc_type); - if (!specptr) { - sigtype = jl_argtype_with_function_type((jl_value_t*)oc_type, (jl_value_t*)argt); + if (specptr == NULL) { jl_method_instance_t *mi_generic = jl_specializations_get_linfo(jl_opaque_closure_method, sigtype, jl_emptysvec); // OC wrapper methods are not world dependent @@ -197,7 +198,7 @@ int jl_tupletype_length_compat(jl_value_t *v, size_t nargs) JL_CALLABLE(jl_f_opaque_closure_call) { - jl_opaque_closure_t* oc = (jl_opaque_closure_t*)F; + jl_opaque_closure_t *oc = (jl_opaque_closure_t*)F; jl_value_t *argt = jl_tparam0(jl_typeof(oc)); if (!jl_tupletype_length_compat(argt, nargs)) jl_method_error(F, args, nargs + 1, oc->world); diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 09d51598ea8b7..f8935070bb001 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -980,3 +980,9 @@ extern "C" JL_DLLEXPORT_CODEGEN ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() JL_NOTSAFEPOINT { return {LLVM_PLUGIN_API_VERSION, "Julia", "1", registerCallbacks}; } + +void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis) +{ + PM->add(new TargetLibraryInfoWrapperPass(triple)); + PM->add(createTargetTransformInfoWrapperPass(std::move(analysis))); +} diff --git a/src/stackwalk.c b/src/stackwalk.c index 6784e601bcfba..7c6f946fe73c5 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -642,13 +642,13 @@ void jl_print_native_codeloc(uintptr_t ip) JL_NOTSAFEPOINT for (i = 0; i < n; i++) { jl_frame_t frame = frames[i]; if (!frame.func_name) { - jl_safe_printf("unknown function (ip: %p)\n", (void*)ip); + jl_safe_printf("unknown function (ip: %p) at %s\n", (void*)ip, frame.file_name ? frame.file_name : "(unknown file)"); } else { jl_safe_print_codeloc(frame.func_name, frame.file_name, frame.line, frame.inlined); free(frame.func_name); - free(frame.file_name); } + free(frame.file_name); } free(frames); }