diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index e4593e7db90e8..8721b900c8bee 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -13,6 +13,7 @@ #include "SPIRVBuiltins.h" #include "SPIRV.h" +#include "SPIRVSubtarget.h" #include "SPIRVUtils.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ValueTracking.h" @@ -82,6 +83,16 @@ struct GroupBuiltin { #define GET_GroupBuiltins_DECL #define GET_GroupBuiltins_IMPL +struct IntelSubgroupsBuiltin { + StringRef Name; + uint32_t Opcode; + bool IsBlock; + bool IsWrite; +}; + +#define GET_IntelSubgroupsBuiltins_DECL +#define GET_IntelSubgroupsBuiltins_IMPL + struct GetBuiltin { StringRef Name; InstructionSet::InstructionSet Set; @@ -549,6 +560,7 @@ static bool buildAtomicCompareExchangeInst(const SPIRV::IncomingCall *Call, assert(GR->getSPIRVTypeForVReg(ObjectPtr)->getOpcode() == SPIRV::OpTypePointer); unsigned ExpectedType = GR->getSPIRVTypeForVReg(ExpectedArg)->getOpcode(); + (void)ExpectedType; assert(IsCmpxchg ? ExpectedType == SPIRV::OpTypeInt : ExpectedType == SPIRV::OpTypePointer); assert(GR->isScalarOfType(Desired, SPIRV::OpTypeInt)); @@ -849,6 +861,7 @@ static bool generateGroupInst(const SPIRV::IncomingCall *Call, if (GroupBuiltin->HasBoolArg) { Register ConstRegister = Call->Arguments[0]; auto ArgInstruction = getDefInstrMaybeConstant(ConstRegister, MRI); + (void)ArgInstruction; // TODO: support non-constant bool values. assert(ArgInstruction->getOpcode() == TargetOpcode::G_CONSTANT && "Only constant bool value args are supported"); @@ -900,6 +913,67 @@ static bool generateGroupInst(const SPIRV::IncomingCall *Call, return true; } +static bool generateIntelSubgroupsInst(const SPIRV::IncomingCall *Call, + MachineIRBuilder &MIRBuilder, + SPIRVGlobalRegistry *GR) { + const SPIRV::DemangledBuiltin *Builtin = Call->Builtin; + MachineFunction &MF = MIRBuilder.getMF(); + const auto *ST = static_cast(&MF.getSubtarget()); + if (!ST->canUseExtension(SPIRV::Extension::SPV_INTEL_subgroups)) { + std::string DiagMsg = std::string(Builtin->Name) + + ": the builtin requires the following SPIR-V " + "extension: SPV_INTEL_subgroups"; + report_fatal_error(DiagMsg.c_str(), false); + } + const SPIRV::IntelSubgroupsBuiltin *IntelSubgroups = + SPIRV::lookupIntelSubgroupsBuiltin(Builtin->Name); + MachineRegisterInfo *MRI = MIRBuilder.getMRI(); + + uint32_t OpCode = IntelSubgroups->Opcode; + if (IntelSubgroups->IsBlock) { + // Minimal number or arguments set in TableGen records is 1 + if (SPIRVType *Arg0Type = GR->getSPIRVTypeForVReg(Call->Arguments[0])) { + if (Arg0Type->getOpcode() == SPIRV::OpTypeImage) { + // TODO: add required validation from the specification: + // "'Image' must be an object whose type is OpTypeImage with a 'Sampled' + // operand of 0 or 2. If the 'Sampled' operand is 2, then some + // dimensions require a capability." + switch (OpCode) { + case SPIRV::OpSubgroupBlockReadINTEL: + OpCode = SPIRV::OpSubgroupImageBlockReadINTEL; + break; + case SPIRV::OpSubgroupBlockWriteINTEL: + OpCode = SPIRV::OpSubgroupImageBlockWriteINTEL; + break; + } + } + } + } + + // TODO: opaque pointers types should be eventually resolved in such a way + // that validation of block read is enabled with respect to the following + // specification requirement: + // "'Result Type' may be a scalar or vector type, and its component type must + // be equal to the type pointed to by 'Ptr'." + // For example, function parameter type should not be default i8 pointer, but + // depend on the result type of the instruction where it is used as a pointer + // argument of OpSubgroupBlockReadINTEL + + // Build Intel subgroups instruction + MachineInstrBuilder MIB = + IntelSubgroups->IsWrite + ? MIRBuilder.buildInstr(OpCode) + : MIRBuilder.buildInstr(OpCode) + .addDef(Call->ReturnRegister) + .addUse(GR->getSPIRVTypeID(Call->ReturnType)); + for (size_t i = 0; i < Call->Arguments.size(); ++i) { + MIB.addUse(Call->Arguments[i]); + MRI->setRegClass(Call->Arguments[i], &SPIRV::IDRegClass); + } + + return true; +} + // These queries ask for a single size_t result for a given dimension index, e.g // size_t get_global_id(uint dimindex). In SPIR-V, the builtins corresonding to // these values are all vec3 types, so we need to extract the correct index or @@ -1199,6 +1273,7 @@ static bool generateImageMiscQueryInst(const SPIRV::IncomingCall *Call, MIRBuilder.getMRI()->setRegClass(Image, &SPIRV::IDRegClass); SPIRV::Dim::Dim ImageDimensionality = static_cast( GR->getSPIRVTypeForVReg(Image)->getOperand(2).getImm()); + (void)ImageDimensionality; switch (Opcode) { case SPIRV::OpImageQuerySamples: @@ -1976,6 +2051,8 @@ std::optional lowerBuiltin(const StringRef DemangledCall, return generateVectorLoadStoreInst(Call.get(), MIRBuilder, GR); case SPIRV::LoadStore: return generateLoadStoreInst(Call.get(), MIRBuilder, GR); + case SPIRV::IntelSubgroups: + return generateIntelSubgroupsInst(Call.get(), MIRBuilder, GR); } return false; } @@ -2119,6 +2196,7 @@ parseBuiltinTypeNameToTargetExtType(std::string TypeName, for (unsigned i = HasTypeParameter ? 1 : 0; i < Parameters.size(); i++) { unsigned IntParameter = 0; bool ValidLiteral = !Parameters[i].getAsInteger(10, IntParameter); + (void)ValidLiteral; assert(ValidLiteral && "Invalid format of SPIR-V builtin parameter literal!"); IntParameters.push_back(IntParameter); diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td index 8acd4691787e4..4013dd22f4ab5 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td @@ -54,6 +54,7 @@ def Enqueue : BuiltinGroup; def AsyncCopy : BuiltinGroup; def VectorLoadStore : BuiltinGroup; def LoadStore : BuiltinGroup; +def IntelSubgroups : BuiltinGroup; //===----------------------------------------------------------------------===// // Class defining a demangled builtin record. The information in the record @@ -625,7 +626,7 @@ def GroupBuiltins : GenericTable { "IsBallotFindBit", "IsLogical", "NoGroupOperation", "HasBoolArg"]; } -// Function to lookup native builtins by their name and set. +// Function to lookup group builtins by their name and set. def lookupGroupBuiltin : SearchIndex { let Table = GroupBuiltins; let Key = ["Name"]; @@ -871,6 +872,61 @@ defm : DemangledGroupBuiltin<"group_non_uniform_scan_inclusive_logical_xors", Wo defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_logical_xors", WorkOrSub, OpGroupNonUniformLogicalXor>; defm : DemangledGroupBuiltin<"group_clustered_reduce_logical_xor", WorkOrSub, OpGroupNonUniformLogicalXor>; +//===----------------------------------------------------------------------===// +// Class defining a sub group builtin that should be translated into a +// SPIR-V instruction using the SPV_INTEL_subgroups extension. +// +// name is the demangled name of the given builtin. +// opcode specifies the SPIR-V operation code of the generated instruction. +//===----------------------------------------------------------------------===// +class IntelSubgroupsBuiltin { + string Name = name; + Op Opcode = operation; + bit IsBlock = !or(!eq(operation, OpSubgroupBlockReadINTEL), + !eq(operation, OpSubgroupBlockWriteINTEL)); + bit IsWrite = !eq(operation, OpSubgroupBlockWriteINTEL); +} + +// Table gathering all the Intel sub group builtins. +def IntelSubgroupsBuiltins : GenericTable { + let FilterClass = "IntelSubgroupsBuiltin"; + let Fields = ["Name", "Opcode", "IsBlock", "IsWrite"]; +} + +// Function to lookup group builtins by their name and set. +def lookupIntelSubgroupsBuiltin : SearchIndex { + let Table = IntelSubgroupsBuiltins; + let Key = ["Name"]; +} + +// Multiclass used to define incoming builtin records for the SPV_INTEL_subgroups extension +// and corresponding work/sub group builtin records. +multiclass DemangledIntelSubgroupsBuiltin minNumArgs, bits<8> maxNumArgs, Op operation> { + def : DemangledBuiltin; + def : IntelSubgroupsBuiltin; +} + +// cl_intel_subgroups +defm : DemangledIntelSubgroupsBuiltin<"shuffle", 2, 2, OpSubgroupShuffleINTEL>; +defm : DemangledIntelSubgroupsBuiltin<"shuffle_down", 3, 3, OpSubgroupShuffleDownINTEL>; +defm : DemangledIntelSubgroupsBuiltin<"shuffle_up", 3, 3, OpSubgroupShuffleUpINTEL>; +defm : DemangledIntelSubgroupsBuiltin<"shuffle_xor", 2, 2, OpSubgroupShuffleXorINTEL>; +foreach i = ["", "2", "4", "8"] in { + // cl_intel_subgroups + defm : DemangledIntelSubgroupsBuiltin; + defm : DemangledIntelSubgroupsBuiltin; + // cl_intel_subgroups_short + defm : DemangledIntelSubgroupsBuiltin; + defm : DemangledIntelSubgroupsBuiltin; +} +// cl_intel_subgroups_char, cl_intel_subgroups_short, cl_intel_subgroups_long +foreach i = ["", "2", "4", "8", "16"] in { + foreach j = ["c", "s", "l"] in { + defm : DemangledIntelSubgroupsBuiltin; + defm : DemangledIntelSubgroupsBuiltin; + } +} +// OpSubgroupImageBlockReadINTEL and OpSubgroupImageBlockWriteINTEL are to be resolved later on (in code) //===----------------------------------------------------------------------===// // Class defining a get builtin record used for lowering builtin calls such as diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td index da033ba32624c..caf2ae43480b1 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td @@ -761,3 +761,21 @@ def OpGroupNonUniformBitwiseXor: OpGroupNUGroup<"BitwiseXor", 361>; def OpGroupNonUniformLogicalAnd: OpGroupNUGroup<"LogicalAnd", 362>; def OpGroupNonUniformLogicalOr: OpGroupNUGroup<"LogicalOr", 363>; def OpGroupNonUniformLogicalXor: OpGroupNUGroup<"LogicalXor", 364>; + +// 3.49.21. Group and Subgroup Instructions +def OpSubgroupShuffleINTEL: Op<5571, (outs ID:$res), (ins TYPE:$type, ID:$data, ID:$invocationId), + "$res = OpSubgroupShuffleINTEL $type $data $invocationId">; +def OpSubgroupShuffleDownINTEL: Op<5572, (outs ID:$res), (ins TYPE:$type, ID:$current, ID:$next, ID:$delta), + "$res = OpSubgroupShuffleDownINTEL $type $current $next $delta">; +def OpSubgroupShuffleUpINTEL: Op<5573, (outs ID:$res), (ins TYPE:$type, ID:$previous, ID:$current, ID:$delta), + "$res = OpSubgroupShuffleUpINTEL $type $previous $current $delta">; +def OpSubgroupShuffleXorINTEL: Op<5574, (outs ID:$res), (ins TYPE:$type, ID:$data, ID:$value), + "$res = OpSubgroupShuffleXorINTEL $type $data $value">; +def OpSubgroupBlockReadINTEL: Op<5575, (outs ID:$res), (ins TYPE:$type, ID:$ptr), + "$res = OpSubgroupBlockReadINTEL $type $ptr">; +def OpSubgroupBlockWriteINTEL: Op<5576, (outs), (ins ID:$ptr, ID:$data), + "OpSubgroupBlockWriteINTEL $ptr $data">; +def OpSubgroupImageBlockReadINTEL: Op<5577, (outs ID:$res), (ins TYPE:$type, ID:$image, ID:$coordinate), + "$res = OpSubgroupImageBlockReadINTEL $type $image $coordinate">; +def OpSubgroupImageBlockWriteINTEL: Op<5578, (outs), (ins ID:$image, ID:$coordinate, ID:$data), + "OpSubgroupImageBlockWriteINTEL $image $coordinate $data">; diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index 370da046984f9..2dfb71dad193a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -908,6 +908,29 @@ void addInstrRequirements(const MachineInstr &MI, case SPIRV::OpGroupNonUniformBallotFindMSB: Reqs.addCapability(SPIRV::Capability::GroupNonUniformBallot); break; + case SPIRV::OpSubgroupShuffleINTEL: + case SPIRV::OpSubgroupShuffleDownINTEL: + case SPIRV::OpSubgroupShuffleUpINTEL: + case SPIRV::OpSubgroupShuffleXorINTEL: + if (ST.canUseExtension(SPIRV::Extension::SPV_INTEL_subgroups)) { + Reqs.addExtension(SPIRV::Extension::SPV_INTEL_subgroups); + Reqs.addCapability(SPIRV::Capability::SubgroupShuffleINTEL); + } + break; + case SPIRV::OpSubgroupBlockReadINTEL: + case SPIRV::OpSubgroupBlockWriteINTEL: + if (ST.canUseExtension(SPIRV::Extension::SPV_INTEL_subgroups)) { + Reqs.addExtension(SPIRV::Extension::SPV_INTEL_subgroups); + Reqs.addCapability(SPIRV::Capability::SubgroupBufferBlockIOINTEL); + } + break; + case SPIRV::OpSubgroupImageBlockReadINTEL: + case SPIRV::OpSubgroupImageBlockWriteINTEL: + if (ST.canUseExtension(SPIRV::Extension::SPV_INTEL_subgroups)) { + Reqs.addExtension(SPIRV::Extension::SPV_INTEL_subgroups); + Reqs.addCapability(SPIRV::Capability::SubgroupImageBlockIOINTEL); + } + break; case SPIRV::OpAssumeTrueKHR: case SPIRV::OpExpectKHR: if (ST.canUseExtension(SPIRV::Extension::SPV_KHR_expect_assume)) { diff --git a/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp index cf6dfb127cdeb..6eb81f2deb3ab 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp @@ -37,6 +37,11 @@ cl::list Extensions( clEnumValN(SPIRV::Extension::SPV_INTEL_optnone, "SPV_INTEL_optnone", "Adds OptNoneINTEL value for Function Control mask that " "indicates a request to not optimize the function"), + clEnumValN(SPIRV::Extension::SPV_INTEL_subgroups, "SPV_INTEL_subgroups", + "Allows work items in a subgroup to share data without the " + "use of local memory and work group barriers, and to " + "utilize specialized hardware to load and store blocks of " + "data from images or buffers."), clEnumValN(SPIRV::Extension::SPV_KHR_no_integer_wrap_decoration, "SPV_KHR_no_integer_wrap_decoration", "Adds decorations to indicate that a given instruction does " diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index ac92ee4a0756a..58ba7781b7777 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -431,9 +431,9 @@ defm InputAttachmentArrayNonUniformIndexingEXT : CapabilityOperand<5310, 0, 0, [ defm UniformTexelBufferArrayNonUniformIndexingEXT : CapabilityOperand<5311, 0, 0, [], [SampledBuffer, ShaderNonUniformEXT]>; defm StorageTexelBufferArrayNonUniformIndexingEXT : CapabilityOperand<5312, 0, 0, [], [ImageBuffer, ShaderNonUniformEXT]>; defm RayTracingNV : CapabilityOperand<5340, 0, 0, [], [Shader]>; -defm SubgroupShuffleINTEL : CapabilityOperand<5568, 0, 0, [], []>; -defm SubgroupBufferBlockIOINTEL : CapabilityOperand<5569, 0, 0, [], []>; -defm SubgroupImageBlockIOINTEL : CapabilityOperand<5570, 0, 0, [], []>; +defm SubgroupShuffleINTEL : CapabilityOperand<5568, 0, 0, [SPV_INTEL_subgroups], []>; +defm SubgroupBufferBlockIOINTEL : CapabilityOperand<5569, 0, 0, [SPV_INTEL_subgroups], []>; +defm SubgroupImageBlockIOINTEL : CapabilityOperand<5570, 0, 0, [SPV_INTEL_subgroups], []>; defm SubgroupImageMediaBlockIOINTEL : CapabilityOperand<5579, 0, 0, [], []>; defm SubgroupAvcMotionEstimationINTEL : CapabilityOperand<5696, 0, 0, [], []>; defm SubgroupAvcMotionEstimationIntraINTEL : CapabilityOperand<5697, 0, 0, [], []>; diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll new file mode 100644 index 0000000000000..0e0b2a4dd6ec2 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll @@ -0,0 +1,189 @@ +; Modified from: https://github.com/KhronosGroup/SPIRV-LLVM-Translator/test/extensions/INTEL/SPV_INTEL_subgroups/cl_intel_sub_groups.ll + +;Source: +;void __kernel test(float2 x, uint c, +; read_only image2d_t image_in, +; write_only image2d_t image_out, +; int2 coord, +; __global uint* p, +; __global ushort* sp, +; __global uchar* cp, +; __global ulong* lp) { +; intel_sub_group_shuffle(x, c); +; intel_sub_group_shuffle_down(x, x, c); +; intel_sub_group_shuffle_up(x, x, c); +; intel_sub_group_shuffle_xor(x, c); +; +; uint2 ui2 = intel_sub_group_block_read2(image_in, coord); +; intel_sub_group_block_write2(image_out, coord, ui2); +; ui2 = intel_sub_group_block_read2(p); +; intel_sub_group_block_write2(p, ui2); +; +; ushort2 us2 = intel_sub_group_block_read_us2(image_in, coord); +; intel_sub_group_block_write_us2(image_out, coord, us2); +; us2 = intel_sub_group_block_read_us2(sp); +; intel_sub_group_block_write_us2(sp, us2); +; +; uchar2 uc2 = intel_sub_group_block_read_uc2(image_in, coord); +; intel_sub_group_block_write_uc2(image_out, coord, uc2); +; uc2 = intel_sub_group_block_read_uc2(cp); +; intel_sub_group_block_write_uc2(cp, uc2); +; +; ulong2 ul2 = intel_sub_group_block_read_ul2(image_in, coord); +; intel_sub_group_block_write_ul2(image_out, coord, ul2); +; ul2 = intel_sub_group_block_read_ul2(lp); +; intel_sub_group_block_write_ul2(lp, ul2); +;} + +; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR + +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --spirv-extensions=SPV_INTEL_subgroups %s -o - | FileCheck %s + +; CHECK-ERROR: LLVM ERROR: intel_sub_group_shuffle: the builtin requires the following SPIR-V extension: SPV_INTEL_subgroups + +; CHECK-DAG: Capability SubgroupShuffleINTEL +; CHECK-DAG: Capability SubgroupBufferBlockIOINTEL +; CHECK-DAG: Capability SubgroupImageBlockIOINTEL +; CHECK: Extension "SPV_INTEL_subgroups" + +; CHECK-SPIRV-LABEL: Function +; CHECK-SPIRV-LABEL: Label + +; CHECK: SubgroupShuffleINTEL +; CHECK: SubgroupShuffleDownINTEL +; CHECK: SubgroupShuffleUpINTEL +; CHECK: SubgroupShuffleXorINTEL + +; CHECK: SubgroupImageBlockReadINTEL +; CHECK: SubgroupImageBlockWriteINTEL +; CHECK: SubgroupBlockReadINTEL +; CHECK: SubgroupBlockWriteINTEL + +; CHECK: SubgroupImageBlockReadINTEL +; CHECK: SubgroupImageBlockWriteINTEL +; CHECK: SubgroupBlockReadINTEL +; CHECK: SubgroupBlockWriteINTEL + +; CHECK: SubgroupImageBlockReadINTEL +; CHECK: SubgroupImageBlockWriteINTEL +; CHECK: SubgroupBlockReadINTEL +; CHECK: SubgroupBlockWriteINTEL + +; CHECK: SubgroupImageBlockReadINTEL +; CHECK: SubgroupImageBlockWriteINTEL +; CHECK: SubgroupBlockReadINTEL +; CHECK: SubgroupBlockWriteINTEL + +; CHECK-SPIRV-LABEL: Return + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64" + +%opencl.image2d_ro_t = type opaque +%opencl.image2d_wo_t = type opaque + +; Function Attrs: convergent nounwind +define spir_kernel void @test(<2 x float> %x, i32 %c, ptr addrspace(1) %image_in, ptr addrspace(1) %image_out, <2 x i32> %coord, ptr addrspace(1) %p, ptr addrspace(1) %sp, ptr addrspace(1) %cp, ptr addrspace(1) %lp) local_unnamed_addr #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 !kernel_arg_name !6 { +entry: + %call = tail call spir_func <2 x float> @_Z23intel_sub_group_shuffleDv2_fj(<2 x float> %x, i32 %c) #2 + %call1 = tail call spir_func <2 x float> @_Z28intel_sub_group_shuffle_downDv2_fS_j(<2 x float> %x, <2 x float> %x, i32 %c) #2 + %call2 = tail call spir_func <2 x float> @_Z26intel_sub_group_shuffle_upDv2_fS_j(<2 x float> %x, <2 x float> %x, i32 %c) #2 + %call3 = tail call spir_func <2 x float> @_Z27intel_sub_group_shuffle_xorDv2_fj(<2 x float> %x, i32 %c) #2 + + %call4 = tail call spir_func <2 x i32> @_Z27intel_sub_group_block_read214ocl_image2d_roDv2_i(ptr addrspace(1) %image_in, <2 x i32> %coord) #2 + tail call spir_func void @_Z28intel_sub_group_block_write214ocl_image2d_woDv2_iDv2_j(ptr addrspace(1) %image_out, <2 x i32> %coord, <2 x i32> %call4) #2 + %call5 = tail call spir_func <2 x i32> @_Z27intel_sub_group_block_read2PU3AS1Kj(ptr addrspace(1) %p) #2 + tail call spir_func void @_Z28intel_sub_group_block_write2PU3AS1jDv2_j(ptr addrspace(1) %p, <2 x i32> %call5) #2 + + %call6 = tail call spir_func <2 x i16> @_Z30intel_sub_group_block_read_us214ocl_image2d_roDv2_i(ptr addrspace(1) %image_in, <2 x i32> %coord) #2 + tail call spir_func void @_Z31intel_sub_group_block_write_us214ocl_image2d_woDv2_iDv2_t(ptr addrspace(1) %image_out, <2 x i32> %coord, <2 x i16> %call6) #2 + %call7 = tail call spir_func <2 x i16> @_Z30intel_sub_group_block_read_us2PU3AS1Kt(ptr addrspace(1) %sp) #2 + tail call spir_func void @_Z31intel_sub_group_block_write_us2PU3AS1tDv2_t(ptr addrspace(1) %sp, <2 x i16> %call7) #2 + + %call8 = tail call spir_func <2 x i8> @_Z30intel_sub_group_block_read_uc214ocl_image2d_roDv2_i(ptr addrspace(1) %image_in, <2 x i32> %coord) #2 + tail call spir_func void @_Z31intel_sub_group_block_write_uc214ocl_image2d_woDv2_iDv2_h(ptr addrspace(1) %image_out, <2 x i32> %coord, <2 x i8> %call8) #2 + %call9 = tail call spir_func <2 x i8> @_Z30intel_sub_group_block_read_uc2PU3AS1Kh(ptr addrspace(1) %cp) #2 + tail call spir_func void @_Z31intel_sub_group_block_write_uc2PU3AS1hDv2_h(ptr addrspace(1) %cp, <2 x i8> %call9) #2 + + %call10 = tail call spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul214ocl_image2d_roDv2_i(ptr addrspace(1) %image_in, <2 x i32> %coord) #2 + tail call spir_func void @_Z31intel_sub_group_block_write_ul214ocl_image2d_woDv2_iDv2_m(ptr addrspace(1) %image_out, <2 x i32> %coord, <2 x i64> %call10) #2 + %call11 = tail call spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(ptr addrspace(1) %lp) #2 + tail call spir_func void @_Z31intel_sub_group_block_write_ul2PU3AS1mDv2_m(ptr addrspace(1) %lp, <2 x i64> %call11) #2 + + ret void +} + +; Function Attrs: convergent +declare spir_func <2 x float> @_Z23intel_sub_group_shuffleDv2_fj(<2 x float>, i32) local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func <2 x float> @_Z28intel_sub_group_shuffle_downDv2_fS_j(<2 x float>, <2 x float>, i32) local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func <2 x float> @_Z26intel_sub_group_shuffle_upDv2_fS_j(<2 x float>, <2 x float>, i32) local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func <2 x float> @_Z27intel_sub_group_shuffle_xorDv2_fj(<2 x float>, i32) local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func <2 x i32> @_Z27intel_sub_group_block_read214ocl_image2d_roDv2_i(ptr addrspace(1), <2 x i32>) local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func void @_Z28intel_sub_group_block_write214ocl_image2d_woDv2_iDv2_j(ptr addrspace(1), <2 x i32>, <2 x i32>) local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func <2 x i32> @_Z27intel_sub_group_block_read2PU3AS1Kj(ptr addrspace(1)) local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func void @_Z28intel_sub_group_block_write2PU3AS1jDv2_j(ptr addrspace(1), <2 x i32>) local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func <2 x i16> @_Z30intel_sub_group_block_read_us214ocl_image2d_roDv2_i(ptr addrspace(1), <2 x i32>) local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func void @_Z31intel_sub_group_block_write_us214ocl_image2d_woDv2_iDv2_t(ptr addrspace(1), <2 x i32>, <2 x i16>) local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func <2 x i16> @_Z30intel_sub_group_block_read_us2PU3AS1Kt(ptr addrspace(1)) local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func void @_Z31intel_sub_group_block_write_us2PU3AS1tDv2_t(ptr addrspace(1), <2 x i16>) local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func <2 x i8> @_Z30intel_sub_group_block_read_uc214ocl_image2d_roDv2_i(ptr addrspace(1), <2 x i32>) local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func void @_Z31intel_sub_group_block_write_uc214ocl_image2d_woDv2_iDv2_h(ptr addrspace(1), <2 x i32>, <2 x i8>) local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func <2 x i8> @_Z30intel_sub_group_block_read_uc2PU3AS1Kh(ptr addrspace(1)) local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func void @_Z31intel_sub_group_block_write_uc2PU3AS1hDv2_h(ptr addrspace(1), <2 x i8>) local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul214ocl_image2d_roDv2_i(ptr addrspace(1), <2 x i32>) local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func void @_Z31intel_sub_group_block_write_ul214ocl_image2d_woDv2_iDv2_m(ptr addrspace(1), <2 x i32>, <2 x i64>) local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(ptr addrspace(1)) local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func void @_Z31intel_sub_group_block_write_ul2PU3AS1mDv2_m(ptr addrspace(1), <2 x i64>) local_unnamed_addr #1 + +attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { convergent "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { convergent nounwind } + +!opencl.ocl.version = !{!0} +!opencl.spir.version = !{!0} + +!0 = !{i32 1, i32 2} +!1 = !{i32 0, i32 0, i32 1, i32 1, i32 0, i32 1, i32 1, i32 1, i32 1} +!2 = !{!"none", !"none", !"read_only", !"write_only", !"none", !"none", !"none", !"none", !"none"} +!3 = !{!"float2", !"uint", !"image2d_t", !"image2d_t", !"int2", !"uint*", !"ushort*", !"uchar*", !"ulong*"} +!4 = !{!"float __attribute__((ext_vector_type(2)))", !"uint", !"image2d_t", !"image2d_t", !"int __attribute__((ext_vector_type(2)))", !"uint*", !"ushort*", !"uchar*", !"ulong*"} +!5 = !{!"", !"", !"", !"", !"", !"", !"", !"", !""} +!6 = !{!"x", !"c", !"image_in", !"image_out", !"coord", !"p", !"sp", !"cp", !"lp"}