diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index e5eb0e44c853f..c3760a50f44a2 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -2496,7 +2496,7 @@ class OpenMPIRBuilder { TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const SmallVector &Dependencies, - bool HasNoWait); + const TargetDataRTArgs &RTArgs, bool HasNoWait); /// Emit the arguments to be passed to the runtime library based on the /// arrays of base pointers, pointers, sizes, map types, and mappers. If diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index ca3d8438654dc..c1f02b2b240de 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -6703,7 +6703,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData( /*TargetTaskAllocaIP=*/{})); else cantFail(emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP, - /*Dependencies=*/{}, Info.HasNoWait)); + /*Dependencies=*/{}, RTArgs, Info.HasNoWait)); } else { Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr( omp::OMPRTL___tgt_target_data_begin_mapper); @@ -7150,15 +7150,55 @@ static Expected createOutlinedFunction( ValueReplacementMap); return Func; } +/// Given a task descriptor, TaskWithPrivates, return the pointer to the block +/// of pointers containing shared data between the parent task and the created +/// task. +static LoadInst *loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, + IRBuilderBase &Builder, + Value *TaskWithPrivates, + Type *TaskWithPrivatesTy) { + Type *TaskTy = OMPIRBuilder.Task; + LLVMContext &Ctx = Builder.getContext(); + Value *TaskT = + Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0); + Value *Shareds = TaskT; + // TaskWithPrivatesTy can be one of the following + // 1. %struct.task_with_privates = type { %struct.kmp_task_ompbuilder_t, + // %struct.privates } + // 2. %struct.kmp_task_ompbuilder_t ;; This is simply TaskTy + // + // In the former case, that is when TaskWithPrivatesTy != TaskTy, + // its first member has to be the task descriptor. TaskTy is the type of the + // task descriptor. TaskT is the pointer to the task descriptor. Loading the + // first member of TaskT, gives us the pointer to shared data. + if (TaskWithPrivatesTy != TaskTy) + Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0); + return Builder.CreateLoad(PointerType::getUnqual(Ctx), Shareds); +} /// Create an entry point for a target task with the following. /// It'll have the following signature /// void @.omp_target_task_proxy_func(i32 %thread.id, ptr %task) /// This function is called from emitTargetTask once the /// code to launch the target kernel has been outlined already. -static Function *emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, - IRBuilderBase &Builder, - CallInst *StaleCI) { +/// NumOffloadingArrays is the number of offloading arrays that we need to copy +/// into the task structure so that the deferred target task can access this +/// data even after the stack frame of the generating task has been rolled +/// back. Offloading arrays contain base pointers, pointers, sizes etc +/// of the data that the target kernel will access. These in effect are the +/// non-empty arrays of pointers held by OpenMPIRBuilder::TargetDataRTArgs. +static Function *emitTargetTaskProxyFunction( + OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, + StructType *PrivatesTy, StructType *TaskWithPrivatesTy, + const size_t NumOffloadingArrays, const int SharedArgsOperandNo) { + + // If NumOffloadingArrays is non-zero, PrivatesTy better not be nullptr. + // This is because PrivatesTy is the type of the structure in which + // we pass the offloading arrays to the deferred target task. + assert((!NumOffloadingArrays || PrivatesTy) && + "PrivatesTy cannot be nullptr when there are offloadingArrays" + "to privatize"); + Module &M = OMPBuilder.M; // KernelLaunchFunction is the target launch function, i.e. // the function that sets up kernel arguments and calls @@ -7185,34 +7225,48 @@ static Function *emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, // call void @_QQmain..omp_par.1(i32 %global.tid.val6) OpenMPIRBuilder::InsertPointTy IP(StaleCI->getParent(), StaleCI->getIterator()); + LLVMContext &Ctx = StaleCI->getParent()->getContext(); + Type *ThreadIDTy = Type::getInt32Ty(Ctx); Type *TaskPtrTy = OMPBuilder.TaskPtr; Type *TaskTy = OMPBuilder.Task; + auto ProxyFnTy = FunctionType::get(Builder.getVoidTy(), {ThreadIDTy, TaskPtrTy}, /* isVarArg */ false); auto ProxyFn = Function::Create(ProxyFnTy, GlobalValue::InternalLinkage, ".omp_target_task_proxy_func", Builder.GetInsertBlock()->getModule()); - ProxyFn->getArg(0)->setName("thread.id"); - ProxyFn->getArg(1)->setName("task"); + Value *ThreadId = ProxyFn->getArg(0); + Value *TaskWithPrivates = ProxyFn->getArg(1); + ThreadId->setName("thread.id"); + TaskWithPrivates->setName("task"); + bool HasShareds = SharedArgsOperandNo > 0; + bool HasOffloadingArrays = NumOffloadingArrays > 0; BasicBlock *EntryBB = BasicBlock::Create(Builder.getContext(), "entry", ProxyFn); Builder.SetInsertPoint(EntryBB); - bool HasShareds = StaleCI->arg_size() > 1; - // TODO: This is a temporary assert to prove to ourselves that - // the outlined target launch function is always going to have - // atmost two arguments if there is any data shared between - // host and device. - assert((!HasShareds || (StaleCI->arg_size() == 2)) && - "StaleCI with shareds should have exactly two arguments."); + SmallVector KernelLaunchArgs; + KernelLaunchArgs.reserve(StaleCI->arg_size()); + KernelLaunchArgs.push_back(ThreadId); + + if (HasOffloadingArrays) { + assert(TaskTy != TaskWithPrivatesTy && + "If there are offloading arrays to pass to the target" + "TaskTy cannot be the same as TaskWithPrivatesTy"); + Value *Privates = + Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1); + for (unsigned int i = 0; i < NumOffloadingArrays; ++i) + KernelLaunchArgs.push_back( + Builder.CreateStructGEP(PrivatesTy, Privates, i)); + } - Value *ThreadId = ProxyFn->getArg(0); if (HasShareds) { - auto *ArgStructAlloca = dyn_cast(StaleCI->getArgOperand(1)); + auto *ArgStructAlloca = + dyn_cast(StaleCI->getArgOperand(SharedArgsOperandNo)); assert(ArgStructAlloca && "Unable to find the alloca instruction corresponding to arguments " "for extracted function"); @@ -7220,27 +7274,67 @@ static Function *emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, AllocaInst *NewArgStructAlloca = Builder.CreateAlloca(ArgStructType, nullptr, "structArg"); - Value *TaskT = ProxyFn->getArg(1); + Value *SharedsSize = Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType)); - Value *Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0); - LoadInst *LoadShared = - Builder.CreateLoad(PointerType::getUnqual(Ctx), Shareds); + LoadInst *LoadShared = loadSharedDataFromTaskDescriptor( + OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy); Builder.CreateMemCpy( NewArgStructAlloca, NewArgStructAlloca->getAlign(), LoadShared, LoadShared->getPointerAlignment(M.getDataLayout()), SharedsSize); - - Builder.CreateCall(KernelLaunchFunction, {ThreadId, NewArgStructAlloca}); - } else { - Builder.CreateCall(KernelLaunchFunction, {ThreadId}); + KernelLaunchArgs.push_back(NewArgStructAlloca); } - + Builder.CreateCall(KernelLaunchFunction, KernelLaunchArgs); Builder.CreateRetVoid(); return ProxyFn; } +static Type *getOffloadingArrayType(Value *V) { + if (auto *GEP = dyn_cast(V)) + return GEP->getSourceElementType(); + if (auto *Alloca = dyn_cast(V)) + return Alloca->getAllocatedType(); + + llvm_unreachable("Unhandled Instruction type"); + return nullptr; +} +// This function returns a struct that has at most two members. +// The first member is always %struct.kmp_task_ompbuilder_t, that is the task +// descriptor. The second member, if needed, is a struct containing arrays +// that need to be passed to the offloaded target kernel. For example, +// if .offload_baseptrs, .offload_ptrs and .offload_sizes have to be passed to +// the target kernel and their types are [3 x ptr], [3 x ptr] and [3 x i64] +// respectively, then the types created by this function are +// +// %struct.privates = type { [3 x ptr], [3 x ptr], [3 x i64] } +// %struct.task_with_privates = type { %struct.kmp_task_ompbuilder_t, +// %struct.privates } +// %struct.task_with_privates is returned by this function. +// If there aren't any offloading arrays to pass to the target kernel, +// %struct.kmp_task_ompbuilder_t is returned. +static StructType * +createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, + ArrayRef OffloadingArraysToPrivatize) { + + if (OffloadingArraysToPrivatize.empty()) + return OMPIRBuilder.Task; + + SmallVector StructFieldTypes; + for (Value *V : OffloadingArraysToPrivatize) { + assert(V->getType()->isPointerTy() && + "Expected pointer to array to privatize. Got a non-pointer value " + "instead"); + Type *ArrayTy = getOffloadingArrayType(V); + assert(ArrayTy && "ArrayType cannot be nullptr"); + StructFieldTypes.push_back(ArrayTy); + } + StructType *PrivatesStructTy = + StructType::create(StructFieldTypes, "struct.privates"); + return StructType::create({OMPIRBuilder.Task, PrivatesStructTy}, + "struct.task_with_privates"); +} static Error emitTargetOutlinedFunction( OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, @@ -7266,7 +7360,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask( TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const SmallVector &Dependencies, - bool HasNoWait) { + const TargetDataRTArgs &RTArgs, bool HasNoWait) { // The following explains the code-gen scenario for the `target` directive. A // similar scneario is followed for other device-related directives (e.g. @@ -7276,27 +7370,30 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask( // When we arrive at this function, the target region itself has been // outlined into the function OutlinedFn. // So at ths point, for - // -------------------------------------------------- + // -------------------------------------------------------------- // void user_code_that_offloads(...) { - // omp target depend(..) map(from:a) map(to:b, c) - // a = b + c + // omp target depend(..) map(from:a) map(to:b) private(i) + // do i = 1, 10 + // a(i) = b(i) + n // } // - // -------------------------------------------------- + // -------------------------------------------------------------- // // we have // - // -------------------------------------------------- + // -------------------------------------------------------------- // // void user_code_that_offloads(...) { - // %.offload_baseptrs = alloca [3 x ptr], align 8 - // %.offload_ptrs = alloca [3 x ptr], align 8 - // %.offload_mappers = alloca [3 x ptr], align 8 + // %.offload_baseptrs = alloca [2 x ptr], align 8 + // %.offload_ptrs = alloca [2 x ptr], align 8 + // %.offload_mappers = alloca [2 x ptr], align 8 // ;; target region has been outlined and now we need to // ;; offload to it via a target task. // } - // void outlined_device_function(ptr a, ptr b, ptr c) { - // *a = *b + *c + // void outlined_device_function(ptr a, ptr b, ptr n) { + // n = *n_ptr; + // do i = 1, 10 + // a(i) = b(i) + n // } // // We have to now do the following @@ -7309,33 +7406,59 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask( // (iii) Create a task with the task entry point created in (ii) // // That is we create the following - // + // struct task_with_privates { + // struct kmp_task_ompbuilder_t task_struct; + // struct privates { + // [2 x ptr] ; baseptrs + // [2 x ptr] ; ptrs + // [2 x i64] ; sizes + // } + // } // void user_code_that_offloads(...) { - // %.offload_baseptrs = alloca [3 x ptr], align 8 - // %.offload_ptrs = alloca [3 x ptr], align 8 - // %.offload_mappers = alloca [3 x ptr], align 8 + // %.offload_baseptrs = alloca [2 x ptr], align 8 + // %.offload_ptrs = alloca [2 x ptr], align 8 + // %.offload_sizes = alloca [2 x i64], align 8 // // %structArg = alloca { ptr, ptr, ptr }, align 8 - // %strucArg[0] = %.offload_baseptrs - // %strucArg[1] = %.offload_ptrs - // %strucArg[2] = %.offload_mappers - // proxy_target_task = @__kmpc_omp_task_alloc(..., - // @.omp_target_task_proxy_func) - // memcpy(proxy_target_task->shareds, %structArg, sizeof(structArg)) + // %strucArg[0] = a + // %strucArg[1] = b + // %strucArg[2] = &n + // + // target_task_with_privates = @__kmpc_omp_target_task_alloc(..., + // sizeof(kmp_task_ompbuilder_t), + // sizeof(structArg), + // @.omp_target_task_proxy_func, + // ...) + // memcpy(target_task_with_privates->task_struct->shareds, %structArg, + // sizeof(structArg)) + // memcpy(target_task_with_privates->privates->baseptrs, + // offload_baseptrs, sizeof(offload_baseptrs) + // memcpy(target_task_with_privates->privates->ptrs, + // offload_ptrs, sizeof(offload_ptrs) + // memcpy(target_task_with_privates->privates->sizes, + // offload_sizes, sizeof(offload_sizes) // dependencies_array = ... // ;; if nowait not present // call @__kmpc_omp_wait_deps(..., dependencies_array) // call @__kmpc_omp_task_begin_if0(...) // call @ @.omp_target_task_proxy_func(i32 thread_id, ptr - // %proxy_target_task) call @__kmpc_omp_task_complete_if0(...) + // %target_task_with_privates) + // call @__kmpc_omp_task_complete_if0(...) // } // // define internal void @.omp_target_task_proxy_func(i32 %thread.id, // ptr %task) { // %structArg = alloca {ptr, ptr, ptr} - // %shared_data = load (getelementptr %task, 0, 0) - // mempcy(%structArg, %shared_data, sizeof(structArg)) - // kernel_launch_function(%thread.id, %structArg) + // %task_ptr = getelementptr(%task, 0, 0) + // %shared_data = load (getelementptr %task_ptr, 0, 0) + // mempcy(%structArg, %shared_data, sizeof(%structArg)) + // + // %offloading_arrays = getelementptr(%task, 0, 1) + // %offload_baseptrs = getelementptr(%offloading_arrays, 0, 0) + // %offload_ptrs = getelementptr(%offloading_arrays, 0, 1) + // %offload_sizes = getelementptr(%offloading_arrays, 0, 2) + // kernel_launch_function(%thread.id, %offload_baseptrs, %offload_ptrs, + // %offload_sizes, %structArg) // } // // We need the proxy function because the signature of the task entry point @@ -7343,21 +7466,21 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask( // that of the kernel_launch function. // // kernel_launch_function is generated by emitKernelLaunch and has the - // always_inline attribute. - // void kernel_launch_function(thread_id, - // structArg) alwaysinline { + // always_inline attribute. For this example, it'll look like so: + // void kernel_launch_function(%thread_id, %offload_baseptrs, %offload_ptrs, + // %offload_sizes, %structArg) alwaysinline { // %kernel_args = alloca %struct.__tgt_kernel_arguments, align 8 - // offload_baseptrs = load(getelementptr structArg, 0, 0) - // offload_ptrs = load(getelementptr structArg, 0, 1) - // offload_mappers = load(getelementptr structArg, 0, 2) + // ; load aggregated data from %structArg // ; setup kernel_args using offload_baseptrs, offload_ptrs and - // ; offload_mappers + // ; offload_sizes // call i32 @__tgt_target_kernel(..., // outlined_device_function, // ptr %kernel_args) // } - // void outlined_device_function(ptr a, ptr b, ptr c) { - // *a = *b + *c + // void outlined_device_function(ptr a, ptr b, ptr n) { + // n = *n_ptr; + // do i = 1, 10 + // a(i) = b(i) + n // } // BasicBlock *TargetTaskBodyBB = @@ -7378,6 +7501,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask( OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal( Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP, "global.tid", false)); + // Generate the task body which will subsequently be outlined. Builder.restoreIP(TargetTaskBodyIP); if (Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP)) return Err; @@ -7396,15 +7520,57 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask( emitBlock(OI.ExitBB, Builder.GetInsertBlock()->getParent(), /*IsFinished=*/true); - OI.PostOutlineCB = [this, ToBeDeleted, Dependencies, HasNoWait, - DeviceID](Function &OutlinedFn) mutable { + SmallVector OffloadingArraysToPrivatize; + bool NeedsTargetTask = HasNoWait && DeviceID; + if (NeedsTargetTask) { + for (auto *V : + {RTArgs.BasePointersArray, RTArgs.PointersArray, RTArgs.MappersArray, + RTArgs.MapNamesArray, RTArgs.MapTypesArray, RTArgs.MapTypesArrayEnd, + RTArgs.SizesArray}) { + if (V && !isa(V)) { + OffloadingArraysToPrivatize.push_back(V); + OI.ExcludeArgsFromAggregate.push_back(V); + } + } + } + OI.PostOutlineCB = [this, ToBeDeleted, Dependencies, NeedsTargetTask, + DeviceID, OffloadingArraysToPrivatize]( + Function &OutlinedFn) mutable { assert(OutlinedFn.hasOneUse() && "there must be a single user for the outlined function"); CallInst *StaleCI = cast(OutlinedFn.user_back()); - bool HasShareds = StaleCI->arg_size() > 1; - Function *ProxyFn = emitTargetTaskProxyFunction(*this, Builder, StaleCI); + // The first argument of StaleCI is always the thread id. + // The next few arguments are the pointers to offloading arrays + // if any. (see OffloadingArraysToPrivatize) + // Finally, all other local values that are live-in into the outlined region + // end up in a structure whose pointer is passed as the last argument. This + // piece of data is passed in the "shared" field of the task structure. So, + // we know we have to pass shareds to the task if the number of arguments is + // greater than OffloadingArraysToPrivatize.size() + 1 The 1 is for the + // thread id. Further, for safety, we assert that the number of arguments of + // StaleCI is exactly OffloadingArraysToPrivatize.size() + 2 + const unsigned int NumStaleCIArgs = StaleCI->arg_size(); + bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.size() + 1; + assert( + !HasShareds || + NumStaleCIArgs == (OffloadingArraysToPrivatize.size() + 2) && + "Wrong number of arguments for StaleCI when shareds are present"); + int SharedArgOperandNo = + HasShareds ? OffloadingArraysToPrivatize.size() + 1 : 0; + + StructType *TaskWithPrivatesTy = + createTaskWithPrivatesTy(*this, OffloadingArraysToPrivatize); + StructType *PrivatesTy = nullptr; + + if (!OffloadingArraysToPrivatize.empty()) + PrivatesTy = + static_cast(TaskWithPrivatesTy->getElementType(1)); + + Function *ProxyFn = emitTargetTaskProxyFunction( + *this, Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy, + OffloadingArraysToPrivatize.size(), SharedArgOperandNo); LLVM_DEBUG(dbgs() << "Proxy task entry function created: " << *ProxyFn << "\n"); @@ -7422,7 +7588,6 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask( // If `HasNoWait == true`, we call @__kmpc_omp_target_task_alloc to provide // the DeviceID to the deferred task and also since // @__kmpc_omp_target_task_alloc creates an untied/async task. - bool NeedsTargetTask = HasNoWait && DeviceID; Function *TaskAllocFn = !NeedsTargetTask ? getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) @@ -7435,17 +7600,19 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask( // Argument - `sizeof_kmp_task_t` (TaskSize) // Tasksize refers to the size in bytes of kmp_task_t data structure - // including private vars accessed in task. - // TODO: add kmp_task_t_with_privates (privates) - Value *TaskSize = - Builder.getInt64(M.getDataLayout().getTypeStoreSize(Task)); + // plus any other data to be passed to the target task, if any, which + // is packed into a struct. kmp_task_t and the struct so created are + // packed into a wrapper struct whose type is TaskWithPrivatesTy. + Value *TaskSize = Builder.getInt64( + M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy)); // Argument - `sizeof_shareds` (SharedsSize) // SharedsSize refers to the shareds array size in the kmp_task_t data // structure. Value *SharedsSize = Builder.getInt64(0); if (HasShareds) { - auto *ArgStructAlloca = dyn_cast(StaleCI->getArgOperand(1)); + auto *ArgStructAlloca = + dyn_cast(StaleCI->getArgOperand(SharedArgOperandNo)); assert(ArgStructAlloca && "Unable to find the alloca instruction corresponding to arguments " "for extracted function"); @@ -7483,13 +7650,32 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask( TaskData = Builder.CreateCall(TaskAllocFn, TaskAllocArgs); + Align Alignment = TaskData->getPointerAlignment(M.getDataLayout()); if (HasShareds) { - Value *Shareds = StaleCI->getArgOperand(1); - Align Alignment = TaskData->getPointerAlignment(M.getDataLayout()); - Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData); + Value *Shareds = StaleCI->getArgOperand(SharedArgOperandNo); + Value *TaskShareds = loadSharedDataFromTaskDescriptor( + *this, Builder, TaskData, TaskWithPrivatesTy); Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment, SharedsSize); } + if (!OffloadingArraysToPrivatize.empty()) { + Value *Privates = + Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1); + for (unsigned int i = 0; i < OffloadingArraysToPrivatize.size(); ++i) { + Value *PtrToPrivatize = OffloadingArraysToPrivatize[i]; + Type *ArrayType = getOffloadingArrayType(PtrToPrivatize); + assert(ArrayType && "ArrayType cannot be nullptr"); + + Type *ElementType = PrivatesTy->getElementType(i); + assert(ElementType == ArrayType && + "ElementType should match ArrayType"); + + Value *Dst = Builder.CreateStructGEP(PrivatesTy, Privates, i); + Builder.CreateMemCpy( + Dst, Alignment, PtrToPrivatize, Alignment, + Builder.getInt64(M.getDataLayout().getTypeStoreSize(ElementType))); + } + } Value *DepArray = emitTaskDependencies(*this, Dependencies); @@ -7635,9 +7821,10 @@ static void emitTargetCall( // Arguments that are intended to be directly forwarded to an // emitKernelLaunch call are pased as nullptr, since // OutlinedFnID=nullptr results in that call not being done. + OpenMPIRBuilder::TargetDataRTArgs EmptyRTArgs; return OMPBuilder.emitTargetTask(TaskBodyCB, /*DeviceID=*/nullptr, /*RTLoc=*/nullptr, AllocaIP, - Dependencies, HasNoWait); + Dependencies, EmptyRTArgs, HasNoWait); } return EmitTargetCallFallbackCB(Builder.saveIP()); }()); @@ -7649,6 +7836,7 @@ static void emitTargetCall( auto &&EmitTargetCallThen = [&](OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP) -> Error { + Info.HasNoWait = HasNoWait; OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP()); OpenMPIRBuilder::TargetDataRTArgs RTArgs; if (Error Err = OMPBuilder.emitOffloadingArraysAndArgs( @@ -7726,7 +7914,8 @@ static void emitTargetCall( // explicit generation of the target task. if (RequiresOuterTargetTask) return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP, - Dependencies, HasNoWait); + Dependencies, KArgs.RTArgs, + Info.HasNoWait); return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs, diff --git a/mlir/test/Target/LLVMIR/omptarget-depend.mlir b/mlir/test/Target/LLVMIR/omptarget-depend.mlir index f2948c6510138..0f2437639319a 100644 --- a/mlir/test/Target/LLVMIR/omptarget-depend.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-depend.mlir @@ -126,7 +126,8 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a // CHECK-DAG: %[[DEP_ARRAY:.+]] = alloca [1 x %struct.kmp_dep_info], align 8 // CHECK: %[[TASKDATA:.+]] = call ptr @__kmpc_omp_task_alloc({{.+}}, ptr @.omp_target_task_proxy_func) -// CHECK: %[[SHARED_DATA:.+]] = load ptr, ptr %[[TASKDATA]], align 8 +// CHECK: %[[SHARED_PTR:.+]] = getelementptr inbounds nuw %struct.kmp_task_ompbuilder_t, ptr %[[TASKDATA]], i32 0, i32 0 +// CHECK: %[[SHARED_DATA:.+]] = load ptr, ptr %[[SHARED_PTR]], align 8 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[SHARED_DATA]], ptr align 1 %[[STRUCTARG]], i64 24, i1 false) // CHECK: %[[DEP_INFO:.+]] = getelementptr inbounds [1 x %struct.kmp_dep_info], ptr %[[DEP_ARRAY]], i64 0, i64 0 diff --git a/mlir/test/Target/LLVMIR/omptarget-nowait-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-nowait-llvm.mlir index b487b31d54477..5eee7b7d7d976 100644 --- a/mlir/test/Target/LLVMIR/omptarget-nowait-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-nowait-llvm.mlir @@ -13,19 +13,48 @@ module attributes {omp.target_triples = ["dummy-target-triple"]} { } llvm.return } +} +// CHECK: %struct.[[TSK_WTH_PRVTS:.*]] = type { %struct.kmp_task_ompbuilder_t, %struct.[[PRVTS:.*]] } +// CHECK: %struct.kmp_task_ompbuilder_t = type { ptr, ptr, i32, ptr, ptr } +// CHECK: %struct.[[PRVTS]] = type { [1 x ptr], [1 x ptr] } // CHECK: define void @_QPfoo() { +// CHECK: %[[STRUCTARG:.*]] = alloca { ptr }, align 8 +// CHECK: %[[BASEPTRS:.*]] = alloca [1 x ptr], align 8 +// CHECK: %[[PTRS:.*]] = alloca [1 x ptr], align 8 +// CHECK: %[[MAPPERS:.*]] = alloca [1 x ptr], align 8 + +// CHECK: getelementptr inbounds [1 x ptr], ptr %[[BASEPTRS]], i32 0, i32 0 +// CHECK: getelementptr inbounds [1 x ptr], ptr %[[PTRS]], i32 0, i32 0 +// CHECK: %[[BASEPTRS_GEP:.*]] = getelementptr inbounds [1 x ptr], ptr %[[BASEPTRS]], i32 0, i32 0 +// CHECK: %[[PTRS_GEP:.*]] = getelementptr inbounds [1 x ptr], ptr %[[PTRS]], i32 0, i32 0 -// CHECK: %[[TASK:.*]] = call ptr @__kmpc_omp_target_task_alloc -// CHECK-SAME: (ptr @{{.*}}, i32 %{{.*}}, i32 {{.*}}, i64 {{.*}}, i64 {{.*}}, ptr -// CHECK-SAME: @[[TASK_PROXY_FUNC:.*]], i64 {{.*}}) -// CHECK: call i32 @__kmpc_omp_task(ptr {{.*}}, i32 %{{.*}}, ptr %[[TASK]]) +// CHECK: %[[TASK:.*]] = call ptr @__kmpc_omp_target_task_alloc +// CHECK-SAME: (ptr @{{.*}}, i32 %{{.*}}, i32 {{.*}}, i64 {{.*}}, i64 {{.*}}, ptr +// CHECK-SAME: @[[TASK_PROXY_FUNC:.*]], i64 {{.*}}) +// CHECK: %[[TSK_PTR:.*]] = getelementptr inbounds nuw %struct.[[TSK_WTH_PRVTS]], ptr %[[TASK]], i32 0, i32 0 +// CHECK: %[[SHAREDS:.*]] = getelementptr inbounds nuw %struct.kmp_task_ompbuilder_t, ptr %[[TSK_PTR]], i32 0, i32 0 +// CHECK: %[[SHAREDS_PTR:.*]] = load ptr, ptr %[[SHAREDS]], align 8 +// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[SHAREDS_PTR]], ptr align 1 %[[STRUCTARG]], i64 8, i1 false) +// CHECK: %[[VAL_50:.*]] = getelementptr inbounds nuw %struct.[[TSK_WTH_PRVTS]], ptr %[[TASK]], i32 0, i32 1 +// CHECK: %[[VAL_51:.*]] = getelementptr inbounds nuw %struct.[[PRVTS]], ptr %[[VAL_50]], i32 0, i32 0 +// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_51]], ptr align 1 %[[BASEPTRS_GEP]], i64 8, i1 false) +// CHECK: %[[VAL_53:.*]] = getelementptr inbounds nuw %struct.[[PRVTS]], ptr %[[VAL_50]], i32 0, i32 1 +// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_53]], ptr align 1 %[[PTRS_GEP]], i64 8, i1 false) +// CHECK: call i32 @__kmpc_omp_task(ptr {{.*}}, i32 %{{.*}}, ptr %[[TASK]]) // CHECK: } +// CHECK: define internal void @[[WORKER:.*]](i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}) { -// CHECK: define internal void @[[TASK_PROXY_FUNC]](i32 %{{.*}}, ptr %{{.*}}) { -// CHECK: call void @_QPfoo..omp_par(i32 %{{.*}}, ptr %{{.*}}) -// CHECK: } -} +// CHECK: define internal void @[[TASK_PROXY_FUNC]](i32 %[[THREAD_ID_PARAM:.*]], ptr %[[TASK_DESC_PARAM:.*]]) { +// CHECK: %[[PRIVATE_DATA:.*]] = getelementptr inbounds nuw %struct.[[TSK_WTH_PRVTS]], ptr %[[TASK_DESC_PARAM]], i32 0, i32 1 +// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds nuw %struct.[[PRVTS]], ptr %[[PRIVATE_DATA]], i32 0, i32 0 +// CHECK: %[[PTRS:.*]] = getelementptr inbounds nuw %struct.[[PRVTS]], ptr %[[PRIVATE_DATA]], i32 0, i32 1 +// CHECK: %[[STRUCTARG:.*]] = alloca { ptr }, align 8 +// CHECK: %[[TASK:.*]] = getelementptr inbounds nuw %struct.[[TSK_WTH_PRVTS]], ptr %[[TASK_DESC_PARAM]], i32 0, i32 0 +// CHECK: %[[SHAREDS:.*]] = getelementptr inbounds nuw %struct.kmp_task_ompbuilder_t, ptr %[[TASK]], i32 0, i32 0 +// CHECK: %[[SHAREDS_PTR:.*]] = load ptr, ptr %[[SHAREDS]], align 8 +// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %[[STRUCTARG]], ptr align 1 %[[SHAREDS_PTR]], i64 8, i1 false) +// CHECK: call void @[[WORKER]](i32 %{{.*}}, ptr %{{.*}}) diff --git a/mlir/test/Target/LLVMIR/omptarget-nowait.mlir b/mlir/test/Target/LLVMIR/omptarget-nowait.mlir new file mode 100644 index 0000000000000..19333c44322f1 --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-nowait.mlir @@ -0,0 +1,70 @@ +// RUN: mlir-translate -mlir-to-llvmir %s 2>&1 | FileCheck %s + +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @launch_(%arg0: !llvm.ptr {fir.bindc_name = "a", llvm.nocapture}) { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x f64 {bindc_name = "n"} : (i64) -> !llvm.ptr + %2 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> + %3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, f64) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%2 : !llvm.ptr) -> !llvm.ptr {name = ""} + %4 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(to) capture(ByRef) members(%3 : [0] : !llvm.ptr) -> !llvm.ptr {name = "a"} + %5 = omp.map.info var_ptr(%1 : !llvm.ptr, f64) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "n"} + omp.target nowait map_entries(%4 -> %arg1, %5 -> %arg2, %3 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + %two_f = llvm.mlir.constant(2.000000e+00 : f64) : f64 + %one_i = llvm.mlir.constant(1 : index) : i64 + %6 = llvm.getelementptr %arg1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> + %7 = llvm.load %6 : !llvm.ptr -> !llvm.ptr + %8 = llvm.getelementptr %7[%one_i] : (!llvm.ptr, i64) -> !llvm.ptr, i8 + %9 = llvm.load %8 : !llvm.ptr -> f64 + %10 = llvm.fmul %9, %two_f {fastmathFlags = #llvm.fastmath} : f64 + llvm.store %10, %8 : f64, !llvm.ptr + omp.terminator + } + llvm.return + } +} + +// CHECK: %struct.[[TSK_WTH_PRVTS:.*]] = type { %struct.kmp_task_ompbuilder_t, %struct.[[PRVTS:.*]] } +// CHECK: %struct.kmp_task_ompbuilder_t = type { ptr, ptr, i32, ptr, ptr } +// CHECK: %struct.[[PRVTS]] = type { [5 x ptr], [5 x ptr], [5 x i64] } + +// CHECK: define void @launch_(ptr captures(none) %0) +// CHECK: %[[STRUCTARG:.*]] = alloca { ptr, ptr }, align 8 +// CHECK: %[[BASEPTRS:.*]] = alloca [5 x ptr], align 8 +// CHECK: %[[PTRS:.*]] = alloca [5 x ptr], align 8 +// CHECK: %[[MAPPERS:.*]] = alloca [5 x ptr], align 8 +// CHECK: %[[SIZES:.*]] = alloca [5 x i64], align 4 + + +// CHECK: %[[VAL_20:.*]] = getelementptr inbounds [5 x ptr], ptr %[[BASEPTRS]], i32 0, i32 0 +// CHECK: %[[BASEPTRS_GEP:.*]] = getelementptr inbounds [5 x ptr], ptr %[[BASEPTRS]], i32 0, i32 0 +// CHECK: %[[PTRS_GEP:.*]] = getelementptr inbounds [5 x ptr], ptr %[[PTRS]], i32 0, i32 0 +// CHECK: %[[SIZES_GEP:.*]] = getelementptr inbounds [5 x i64], ptr %[[SIZES]], i32 0, i32 0 + +// CHECK: %[[GL_THRD_NUM:.*]] = call i32 @__kmpc_global_thread_num +// CHECK: %[[TASK_DESC:.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @4, i32 {{.*}}, i32 0, i64 160, i64 16, ptr [[TGT_TSK_PRXY_FNC:.*]], i64 -1) +// CHECK: %[[TSK_PTR:.*]] = getelementptr inbounds nuw %struct.[[TSK_WTH_PRVTS]], ptr %[[TASK_DESC]], i32 0, i32 0 +// CHECK: %[[SHAREDS:.*]] = getelementptr inbounds nuw %struct.kmp_task_ompbuilder_t, ptr %[[TSK_PTR]], i32 0, i32 0 +// CHECK: %[[SHAREDS_PTR:.*]] = load ptr, ptr %[[SHAREDS]], align 8 +// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[SHAREDS_PTR]], ptr align 1 %[[STRUCTARG]], i64 16, i1 false) +// CHECK: %[[VAL_50:.*]] = getelementptr inbounds nuw %struct.[[TSK_WTH_PRVTS]], ptr %[[TASK_DESC]], i32 0, i32 1 +// CHECK: %[[VAL_51:.*]] = getelementptr inbounds nuw %struct.[[PRVTS]], ptr %[[VAL_50]], i32 0, i32 0 +// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_51]], ptr align 1 %[[BASEPTRS_GEP]], i64 40, i1 false) +// CHECK: %[[VAL_53:.*]] = getelementptr inbounds nuw %struct.[[PRVTS]], ptr %[[VAL_50]], i32 0, i32 1 +// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_53]], ptr align 1 %[[PTRS_GEP]], i64 40, i1 false) +// CHECK: %[[VAL_54:.*]] = getelementptr inbounds nuw %struct.[[PRVTS]], ptr %[[VAL_50]], i32 0, i32 2 +// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_54]], ptr align 1 %[[SIZES_GEP]], i64 40, i1 false) +// CHECK: %[[VAL_55:.*]] = call i32 @__kmpc_omp_task(ptr @4, i32 %[[GL_THRD_NUM]], ptr %[[TASK_DESC]]) + +// CHECK: define internal void @[[WORKER:.*]](i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}) { + +// CHECK: define internal void [[TGT_TSK_PRXY_FNC]](i32 %[[THREAD_ID_PARAM:.*]], ptr %[[TASK_DESC_PARAM:.*]]) { +// CHECK: %[[PRIVATE_DATA:.*]] = getelementptr inbounds nuw %struct.[[TSK_WTH_PRVTS]], ptr %[[TASK_DESC_PARAM]], i32 0, i32 1 +// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds nuw %struct.[[PRVTS]], ptr %[[PRIVATE_DATA]], i32 0, i32 0 +// CHECK: %[[PTRS:.*]] = getelementptr inbounds nuw %struct.[[PRVTS]], ptr %[[PRIVATE_DATA]], i32 0, i32 1 +// CHECK: %[[SIZES:.*]] = getelementptr inbounds nuw %struct.[[PRVTS]], ptr %[[PRIVATE_DATA]], i32 0, i32 2 +// CHECK: %[[STRUCTARG:.*]] = alloca { ptr, ptr }, align 8 +// CHECK: %[[TASK:.*]] = getelementptr inbounds nuw %struct.[[TSK_WTH_PRVTS]], ptr %[[TASK_DESC_PARAM]], i32 0, i32 0 +// CHECK: %[[SHAREDS:.*]] = getelementptr inbounds nuw %struct.kmp_task_ompbuilder_t, ptr %[[TASK]], i32 0, i32 0 +// CHECK: %[[SHAREDS_PTR:.*]] = load ptr, ptr %[[SHAREDS]], align 8 +// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %[[STRUCTARG]], ptr align 1 %[[SHAREDS_PTR]], i64 16, i1 false) +// CHECK: call void @[[WORKER]](i32 %[[THREAD_ID_PARAM]], ptr %[[BASEPTRS]], ptr %[[PTRS]], ptr %[[SIZES]], ptr %[[STRUCTARG]]) diff --git a/mlir/test/Target/LLVMIR/omptargetdata-nowait-llvm.mlir b/mlir/test/Target/LLVMIR/omptargetdata-nowait-llvm.mlir index 8124d02ef2174..dba8c553aaca5 100644 --- a/mlir/test/Target/LLVMIR/omptargetdata-nowait-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptargetdata-nowait-llvm.mlir @@ -14,25 +14,20 @@ llvm.func @_QPopenmp_target_data_enter() { // CHECK: %[[TASK:.*]] = call ptr @__kmpc_omp_target_task_alloc // CHECK-SAME: (ptr @{{.*}}, i32 %{{.*}}, i32 {{.*}}, i64 {{.*}}, i64 {{.*}}, ptr -// CHECK-SAME: @[[TASK_PROXY_FUNC:.*]], i64 {{.*}}) +// CHECK-SAME: @[[TASK_PROXY_FUNC_ENTER:.*]], i64 {{.*}}) // CHECK: call i32 @__kmpc_omp_task(ptr {{.*}}, i32 %{{.*}}, ptr %[[TASK]]) // CHECK: } -// CHECK: define internal void @[[TASK_BODY_FUNC:.*]](i32 %[[TID:.*]], ptr %[[TASK_ARG:.*]]) { -// CHECK: %[[OFFLOAD_BASE_PTRS:.*]] = getelementptr { ptr, ptr }, ptr %[[TASK_ARG]], i32 0, i32 0 -// CHECK: %[[OFFLOAD_BASE_PTRS_VAL:.*]] = load ptr, ptr %[[OFFLOAD_BASE_PTRS]], align 8 -// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr { ptr, ptr }, ptr %[[TASK_ARG]], i32 0, i32 1 -// CHECK: %[[OFFLOAD_PTRS_VAL:.*]] = load ptr, ptr %[[OFFLOAD_PTRS]], align 8 - +// CHECK: define internal void @[[TASK_BODY_FUNC_ENTER:.*]](i32 {{.*}}, ptr %[[OFFLOAD_BASE_PTRS:.*]], ptr %[[OFFLOAD_PTRS:.*]]) { // CHECK: call void @__tgt_target_data_begin_nowait_mapper( // CHECK-SAME: ptr @{{.*}}, i64 -1, i32 1, -// CHECK-SAME: ptr %[[OFFLOAD_BASE_PTRS_VAL]], ptr %[[OFFLOAD_PTRS_VAL]], +// CHECK-SAME: ptr %[[OFFLOAD_BASE_PTRS]], ptr %[[OFFLOAD_PTRS]], // CHECK-SAME: ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr null, i32 0, ptr null, i32 0, ptr null) // CHECK: } -// CHECK: define internal void @[[TASK_PROXY_FUNC]](i32 %{{.*}}, ptr %{{.*}}) { -// CHECK: call void @[[TASK_BODY_FUNC]](i32 %{{.*}}, ptr %{{.*}}) +// CHECK: define internal void @[[TASK_PROXY_FUNC_ENTER]](i32 %{{.*}}, ptr %{{.*}}) { +// CHECK: call void @[[TASK_BODY_FUNC_ENTER]](i32 %{{.*}}, ptr %{{.*}}, ptr %{{.*}}) // CHECK: } // ----- @@ -51,25 +46,20 @@ llvm.func @_QPopenmp_target_data_update() { // CHECK: %[[TASK:.*]] = call ptr @__kmpc_omp_target_task_alloc // CHECK-SAME: (ptr @{{.*}}, i32 %{{.*}}, i32 {{.*}}, i64 {{.*}}, i64 {{.*}}, ptr -// CHECK-SAME: @[[TASK_PROXY_FUNC:.*]], i64 {{.*}}) +// CHECK-SAME: @[[TASK_PROXY_FUNC_UPDATE:.*]], i64 {{.*}}) // CHECK: call i32 @__kmpc_omp_task(ptr {{.*}}, i32 %{{.*}}, ptr %[[TASK]]) // CHECK: } -// CHECK: define internal void @[[TASK_BODY_FUNC:.*]](i32 %[[TID:.*]], ptr %[[TASK_ARG:.*]]) { -// CHECK: %[[OFFLOAD_BASE_PTRS:.*]] = getelementptr { ptr, ptr }, ptr %[[TASK_ARG]], i32 0, i32 0 -// CHECK: %[[OFFLOAD_BASE_PTRS_VAL:.*]] = load ptr, ptr %[[OFFLOAD_BASE_PTRS]], align 8 -// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr { ptr, ptr }, ptr %[[TASK_ARG]], i32 0, i32 1 -// CHECK: %[[OFFLOAD_PTRS_VAL:.*]] = load ptr, ptr %[[OFFLOAD_PTRS]], align 8 - +// CHECK: define internal void @[[TASK_BODY_FUNC_UPDATE:.*]](i32 {{.*}}, ptr %[[OFFLOAD_BASE_PTRS:.*]], ptr %[[OFFLOAD_PTRS:.*]]) { // CHECK: call void @__tgt_target_data_update_nowait_mapper( // CHECK-SAME: ptr @{{.*}}, i64 -1, i32 1, -// CHECK-SAME: ptr %[[OFFLOAD_BASE_PTRS_VAL]], ptr %[[OFFLOAD_PTRS_VAL]], +// CHECK-SAME: ptr %[[OFFLOAD_BASE_PTRS]], ptr %[[OFFLOAD_PTRS]], // CHECK-SAME: ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr null, i32 0, ptr null, i32 0, ptr null) // CHECK: } -// CHECK: define internal void @[[TASK_PROXY_FUNC]](i32 %{{.*}}, ptr %{{.*}}) { -// CHECK: call void @[[TASK_BODY_FUNC]](i32 %{{.*}}, ptr %{{.*}}) +// CHECK: define internal void @[[TASK_PROXY_FUNC_UPDATE]](i32 %{{.*}}, ptr %{{.*}}) { +// CHECK: call void @[[TASK_BODY_FUNC_UPDATE]](i32 %{{.*}}, ptr %{{.*}}) // CHECK: } // ----- @@ -88,23 +78,18 @@ llvm.func @_QPopenmp_target_data_exit() { // CHECK: %[[TASK:.*]] = call ptr @__kmpc_omp_target_task_alloc // CHECK-SAME: (ptr @{{.*}}, i32 %{{.*}}, i32 {{.*}}, i64 {{.*}}, i64 {{.*}}, ptr -// CHECK-SAME: @[[TASK_PROXY_FUNC:.*]], i64 {{.*}}) +// CHECK-SAME: @[[TASK_PROXY_FUNC_EXIT:.*]], i64 {{.*}}) // CHECK: call i32 @__kmpc_omp_task(ptr {{.*}}, i32 %{{.*}}, ptr %[[TASK]]) // CHECK: } -// CHECK: define internal void @[[TASK_BODY_FUNC:.*]](i32 %[[TID:.*]], ptr %[[TASK_ARG:.*]]) { -// CHECK: %[[OFFLOAD_BASE_PTRS:.*]] = getelementptr { ptr, ptr }, ptr %[[TASK_ARG]], i32 0, i32 0 -// CHECK: %[[OFFLOAD_BASE_PTRS_VAL:.*]] = load ptr, ptr %[[OFFLOAD_BASE_PTRS]], align 8 -// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr { ptr, ptr }, ptr %[[TASK_ARG]], i32 0, i32 1 -// CHECK: %[[OFFLOAD_PTRS_VAL:.*]] = load ptr, ptr %[[OFFLOAD_PTRS]], align 8 - +// CHECK: define internal void @[[TASK_BODY_FUNC_EXIT:.*]](i32 %{{.*}}, ptr %[[OFFLOAD_BASE_PTRS:.*]], ptr %[[OFFLOAD_PTRS:.*]]) { // CHECK: call void @__tgt_target_data_end_nowait_mapper( // CHECK-SAME: ptr @{{.*}}, i64 -1, i32 1, -// CHECK-SAME: ptr %[[OFFLOAD_BASE_PTRS_VAL]], ptr %[[OFFLOAD_PTRS_VAL]], +// CHECK-SAME: ptr %[[OFFLOAD_BASE_PTRS]], ptr %[[OFFLOAD_PTRS]], // CHECK-SAME: ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr null, i32 0, ptr null, i32 0, ptr null) // CHECK: } -// CHECK: define internal void @[[TASK_PROXY_FUNC]](i32 %{{.*}}, ptr %{{.*}}) { -// CHECK: call void @[[TASK_BODY_FUNC]](i32 %{{.*}}, ptr %{{.*}}) +// CHECK: define internal void @[[TASK_PROXY_FUNC_EXIT]](i32 %{{.*}}, ptr %{{.*}}) { +// CHECK: call void @[[TASK_BODY_FUNC_EXIT]](i32 %{{.*}}, ptr %{{.*}}) // CHECK: }