diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td index 571cfcfd6e7e5..bcd27c82439f9 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td @@ -619,7 +619,8 @@ class GroupBuiltin { !eq(operation, OpGroupNonUniformShuffleDown), !eq(operation, OpGroupBroadcast), !eq(operation, OpGroupNonUniformBroadcast), - !eq(operation, OpGroupNonUniformBroadcastFirst)); + !eq(operation, OpGroupNonUniformBroadcastFirst), + !eq(operation, OpGroupNonUniformRotateKHR)); bit HasBoolArg = !or(!and(IsAllOrAny, !eq(IsAllEqual, false)), IsBallot, IsLogical); } @@ -877,6 +878,10 @@ defm : DemangledGroupBuiltin<"group_non_uniform_scan_inclusive_logical_xors", Wo defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_logical_xors", WorkOrSub, OpGroupNonUniformLogicalXor>; defm : DemangledGroupBuiltin<"group_clustered_reduce_logical_xor", WorkOrSub, OpGroupNonUniformLogicalXor>; +// cl_khr_subgroup_rotate / SPV_KHR_subgroup_rotate +defm : DemangledGroupBuiltin<"group_rotate", OnlySub, OpGroupNonUniformRotateKHR>; +defm : DemangledGroupBuiltin<"group_clustered_rotate", OnlySub, OpGroupNonUniformRotateKHR>; + // cl_khr_work_group_uniform_arithmetic / SPV_KHR_uniform_group_instructions defm : DemangledGroupBuiltin<"group_reduce_imul", OnlyWork, OpGroupIMulKHR>; defm : DemangledGroupBuiltin<"group_reduce_mulu", OnlyWork, OpGroupIMulKHR>; diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td index 0f11bc34d176f..86f65b6320d53 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td @@ -765,6 +765,11 @@ def OpGroupNonUniformLogicalAnd: OpGroupNUGroup<"LogicalAnd", 362>; def OpGroupNonUniformLogicalOr: OpGroupNUGroup<"LogicalOr", 363>; def OpGroupNonUniformLogicalXor: OpGroupNUGroup<"LogicalXor", 364>; +// SPV_KHR_subgroup_rotate +def OpGroupNonUniformRotateKHR: Op<4431, (outs ID:$res), + (ins TYPE:$type, ID:$scope, ID:$value, ID:$delta, variable_ops), + "$res = OpGroupNonUniformRotateKHR $type $scope $value $delta">; + // 3.49.7, Constant-Creation Instructions // - SPV_INTEL_function_pointers diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index dbda2871e153d..9b9575b987994 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -1069,6 +1069,15 @@ void addInstrRequirements(const MachineInstr &MI, Reqs.addCapability(SPIRV::Capability::FunctionPointersINTEL); } break; + case SPIRV::OpGroupNonUniformRotateKHR: + if (!ST.canUseExtension(SPIRV::Extension::SPV_KHR_subgroup_rotate)) + report_fatal_error("OpGroupNonUniformRotateKHR instruction requires the " + "following SPIR-V extension: SPV_KHR_subgroup_rotate", + false); + Reqs.addExtension(SPIRV::Extension::SPV_KHR_subgroup_rotate); + Reqs.addCapability(SPIRV::Capability::GroupNonUniformRotateKHR); + Reqs.addCapability(SPIRV::Capability::GroupNonUniform); + break; case SPIRV::OpGroupIMulKHR: case SPIRV::OpGroupFMulKHR: case SPIRV::OpGroupBitwiseAndKHR: diff --git a/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp index e186154aa408b..4694363614ef6 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp @@ -75,6 +75,10 @@ cl::list Extensions( "Allows to use the LinkOnceODR linkage type that is to let " "a function or global variable to be merged with other functions " "or global variables of the same name when linkage occurs."), + clEnumValN(SPIRV::Extension::SPV_KHR_subgroup_rotate, + "SPV_KHR_subgroup_rotate", + "Adds a new instruction that enables rotating values across " + "invocations within a subgroup."), clEnumValN(SPIRV::Extension::SPV_INTEL_function_pointers, "SPV_INTEL_function_pointers", "Allows translation of function pointers."))); diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index 4e5ac0d531b2d..6c36087baa85e 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -455,6 +455,7 @@ defm BitInstructions : CapabilityOperand<6025, 0, 0, [SPV_KHR_bit_instructions], defm ExpectAssumeKHR : CapabilityOperand<5629, 0, 0, [SPV_KHR_expect_assume], []>; defm FunctionPointersINTEL : CapabilityOperand<5603, 0, 0, [SPV_INTEL_function_pointers], []>; defm IndirectReferencesINTEL : CapabilityOperand<5604, 0, 0, [SPV_INTEL_function_pointers], []>; +defm GroupNonUniformRotateKHR : CapabilityOperand<6026, 0, 0, [SPV_KHR_subgroup_rotate], [GroupNonUniform]>; defm AtomicFloat32AddEXT : CapabilityOperand<6033, 0, 0, [SPV_EXT_shader_atomic_float_add], []>; defm AtomicFloat64AddEXT : CapabilityOperand<6034, 0, 0, [SPV_EXT_shader_atomic_float_add], []>; defm AtomicFloat16AddEXT : CapabilityOperand<6095, 0, 0, [SPV_EXT_shader_atomic_float16_add], []>; diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_subgroup_rotate/subgroup-rotate.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_subgroup_rotate/subgroup-rotate.ll new file mode 100644 index 0000000000000..b1d6a09c7fe35 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_subgroup_rotate/subgroup-rotate.ll @@ -0,0 +1,357 @@ +; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --spirv-extensions=SPV_KHR_subgroup_rotate %s -o - | FileCheck %s +; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-extensions=SPV_KHR_subgroup_rotate %s -o - -filetype=obj | spirv-val %} + +; CHECK-ERROR: LLVM ERROR: OpGroupNonUniformRotateKHR instruction requires the following SPIR-V extension: SPV_KHR_subgroup_rotate + +; CHECK: OpCapability GroupNonUniformRotateKHR +; CHECK: OpExtension "SPV_KHR_subgroup_rotate" + +; CHECK-DAG: %[[TyInt8:.*]] = OpTypeInt 8 0 +; CHECK-DAG: %[[TyInt16:.*]] = OpTypeInt 16 0 +; CHECK-DAG: %[[TyInt32:.*]] = OpTypeInt 32 0 +; CHECK-DAG: %[[TyInt64:.*]] = OpTypeInt 64 0 +; CHECK-DAG: %[[TyFloat:.*]] = OpTypeFloat 32 +; CHECK-DAG: %[[TyHalf:.*]] = OpTypeFloat 16 +; CHECK-DAG: %[[TyDouble:.*]] = OpTypeFloat 64 +; CHECK-DAG: %[[ScopeSubgroup:.*]] = OpConstant %[[TyInt32]] 3 +; CHECK-DAG: %[[ConstInt2:.*]] = OpConstant %[[TyInt32]] 2 +; CHECK-DAG: %[[ConstInt4:.*]] = OpConstant %[[TyInt32]] 4 + +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir" + +; Function Attrs: convergent noinline norecurse nounwind optnone +define dso_local spir_kernel void @testRotateChar(ptr addrspace(1) noundef align 1 %dst) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { +entry: + %dst.addr = alloca ptr addrspace(1), align 4 + %v = alloca i8, align 1 + store ptr addrspace(1) %dst, ptr %dst.addr, align 4 + store i8 0, ptr %v, align 1 + %value = load i8, ptr %v, align 1 +; CHECK: OpGroupNonUniformRotateKHR %[[TyInt8]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] + %call = call spir_func signext i8 @_Z16sub_group_rotateci(i8 noundef signext %value, i32 noundef 2) #2 + %data = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx = getelementptr inbounds i8, ptr addrspace(1) %data, i32 0 + store i8 %call, ptr addrspace(1) %arrayidx, align 1 + %value_clustered = load i8, ptr %v, align 1 +; CHECK: OpGroupNonUniformRotateKHR %[[TyInt8]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] %[[ConstInt4]] + %call1 = call spir_func signext i8 @_Z26sub_group_clustered_rotatecij(i8 noundef signext %value_clustered, i32 noundef 2, i32 noundef 4) #2 + %data2 = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx2 = getelementptr inbounds i8, ptr addrspace(1) %data2, i32 1 + store i8 %call1, ptr addrspace(1) %arrayidx2, align 1 + ret void +} + +; Function Attrs: convergent nounwind +declare spir_func signext i8 @_Z16sub_group_rotateci(i8 noundef signext, i32 noundef) #1 + +; Function Attrs: convergent nounwind +declare spir_func signext i8 @_Z26sub_group_clustered_rotatecij(i8 noundef signext, i32 noundef, i32 noundef) #1 + +; Function Attrs: convergent noinline norecurse nounwind optnone +define dso_local spir_kernel void @testRotateUChar(ptr addrspace(1) noundef align 1 %dst) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !6 { +entry: + %dst.addr = alloca ptr addrspace(1), align 4 + %v = alloca i8, align 1 + store ptr addrspace(1) %dst, ptr %dst.addr, align 4 + store i8 0, ptr %v, align 1 + %value = load i8, ptr %v, align 1 +; CHECK: OpGroupNonUniformRotateKHR %[[TyInt8]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] + %call = call spir_func zeroext i8 @_Z16sub_group_rotatehi(i8 noundef zeroext %value, i32 noundef 2) #2 + %data = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx = getelementptr inbounds i8, ptr addrspace(1) %data, i32 0 + store i8 %call, ptr addrspace(1) %arrayidx, align 1 + %value_clustered = load i8, ptr %v, align 1 +; CHECK: OpGroupNonUniformRotateKHR %[[TyInt8]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] %[[ConstInt4]] + %call1 = call spir_func zeroext i8 @_Z26sub_group_clustered_rotatehij(i8 noundef zeroext %value_clustered, i32 noundef 2, i32 noundef 4) #2 + %data2 = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx2 = getelementptr inbounds i8, ptr addrspace(1) %data2, i32 1 + store i8 %call1, ptr addrspace(1) %arrayidx2, align 1 + ret void +} + +; Function Attrs: convergent nounwind +declare spir_func zeroext i8 @_Z16sub_group_rotatehi(i8 noundef zeroext, i32 noundef) #1 + +; Function Attrs: convergent nounwind +declare spir_func zeroext i8 @_Z26sub_group_clustered_rotatehij(i8 noundef zeroext, i32 noundef, i32 noundef) #1 + +; Function Attrs: convergent noinline norecurse nounwind optnone +define dso_local spir_kernel void @testRotateShort(ptr addrspace(1) noundef align 2 %dst) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !8 !kernel_arg_base_type !8 !kernel_arg_type_qual !6 { +entry: + %dst.addr = alloca ptr addrspace(1), align 4 + %v = alloca i16, align 2 + store ptr addrspace(1) %dst, ptr %dst.addr, align 4 + store i16 0, ptr %v, align 2 + %value = load i16, ptr %v, align 2 + ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt16]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] + %call = call spir_func signext i16 @_Z16sub_group_rotatesi(i16 noundef signext %value, i32 noundef 2) #2 + %data = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx = getelementptr inbounds i16, ptr addrspace(1) %data, i32 0 + store i16 %call, ptr addrspace(1) %arrayidx, align 2 + %value_clustered = load i16, ptr %v, align 2 + ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt16]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] %[[ConstInt4]] + %call1 = call spir_func signext i16 @_Z26sub_group_clustered_rotatesij(i16 noundef signext %value_clustered, i32 noundef 2, i32 noundef 4) #2 + %data2 = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx2 = getelementptr inbounds i16, ptr addrspace(1) %data2, i32 1 + store i16 %call1, ptr addrspace(1) %arrayidx2, align 2 + ret void +} + +; Function Attrs: convergent nounwind +declare spir_func signext i16 @_Z16sub_group_rotatesi(i16 noundef signext, i32 noundef) #1 + +; Function Attrs: convergent nounwind +declare spir_func signext i16 @_Z26sub_group_clustered_rotatesij(i16 noundef signext, i32 noundef, i32 noundef) #1 + +; Function Attrs: convergent noinline norecurse nounwind optnone +define dso_local spir_kernel void @testRotateUShort(ptr addrspace(1) noundef align 2 %dst) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !6 { +entry: + %dst.addr = alloca ptr addrspace(1), align 4 + %v = alloca i16, align 2 + store ptr addrspace(1) %dst, ptr %dst.addr, align 4 + store i16 0, ptr %v, align 2 + %value = load i16, ptr %v, align 2 + ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt16]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] + %call = call spir_func zeroext i16 @_Z16sub_group_rotateti(i16 noundef zeroext %value, i32 noundef 2) #2 + %data = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx = getelementptr inbounds i16, ptr addrspace(1) %data, i32 0 + store i16 %call, ptr addrspace(1) %arrayidx, align 2 + %value_clustered = load i16, ptr %v, align 2 + ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt16]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] %[[ConstInt4]] + %call1 = call spir_func zeroext i16 @_Z26sub_group_clustered_rotatetij(i16 noundef zeroext %value_clustered, i32 noundef 2, i32 noundef 4) #2 + %data2 = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx2 = getelementptr inbounds i16, ptr addrspace(1) %data2, i32 1 + store i16 %call1, ptr addrspace(1) %arrayidx2, align 2 + ret void +} + +; Function Attrs: convergent nounwind +declare spir_func zeroext i16 @_Z16sub_group_rotateti(i16 noundef zeroext, i32 noundef) #1 + +; Function Attrs: convergent nounwind +declare spir_func zeroext i16 @_Z26sub_group_clustered_rotatetij(i16 noundef zeroext, i32 noundef, i32 noundef) #1 + +; Function Attrs: convergent noinline norecurse nounwind optnone +define dso_local spir_kernel void @testRotateInt(ptr addrspace(1) noundef align 4 %dst) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !6 { +entry: + %dst.addr = alloca ptr addrspace(1), align 4 + %v = alloca i32, align 4 + store ptr addrspace(1) %dst, ptr %dst.addr, align 4 + store i32 0, ptr %v, align 4 + %value = load i32, ptr %v, align 4 + ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt32]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] + %call = call spir_func i32 @_Z16sub_group_rotateii(i32 noundef %value, i32 noundef 2) #2 + %data = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %data, i32 0 + store i32 %call, ptr addrspace(1) %arrayidx, align 4 + %value_clustered = load i32, ptr %v, align 4 + ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt32]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] %[[ConstInt4]] + %call1 = call spir_func i32 @_Z26sub_group_clustered_rotateiij(i32 noundef %value_clustered, i32 noundef 2, i32 noundef 4) #2 + %data2 = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %data2, i32 1 + store i32 %call1, ptr addrspace(1) %arrayidx2, align 4 + ret void +} + +; Function Attrs: convergent nounwind +declare spir_func i32 @_Z16sub_group_rotateii(i32 noundef, i32 noundef) #1 + +; Function Attrs: convergent nounwind +declare spir_func i32 @_Z26sub_group_clustered_rotateiij(i32 noundef, i32 noundef, i32 noundef) #1 + +; Function Attrs: convergent noinline norecurse nounwind optnone +define dso_local spir_kernel void @testRotateUInt(ptr addrspace(1) noundef align 4 %dst) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !6 { +entry: + %dst.addr = alloca ptr addrspace(1), align 4 + %v = alloca i32, align 4 + store ptr addrspace(1) %dst, ptr %dst.addr, align 4 + store i32 0, ptr %v, align 4 + %value = load i32, ptr %v, align 4 + ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt32]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] + %call = call spir_func i32 @_Z16sub_group_rotateji(i32 noundef %value, i32 noundef 2) #2 + %data = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %data, i32 0 + store i32 %call, ptr addrspace(1) %arrayidx, align 4 + %value_clustered = load i32, ptr %v, align 4 + ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt32]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] %[[ConstInt4]] + %call1 = call spir_func i32 @_Z26sub_group_clustered_rotatejij(i32 noundef %value_clustered, i32 noundef 2, i32 noundef 4) #2 + %data2 = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %data2, i32 1 + store i32 %call1, ptr addrspace(1) %arrayidx2, align 4 + ret void +} + +; Function Attrs: convergent nounwind +declare spir_func i32 @_Z16sub_group_rotateji(i32 noundef, i32 noundef) #1 + +; Function Attrs: convergent nounwind +declare spir_func i32 @_Z26sub_group_clustered_rotatejij(i32 noundef, i32 noundef, i32 noundef) #1 + +; Function Attrs: convergent noinline norecurse nounwind optnone +define dso_local spir_kernel void @testRotateLong(ptr addrspace(1) noundef align 8 %dst) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !6 { +entry: + %dst.addr = alloca ptr addrspace(1), align 4 + %v = alloca i64, align 8 + store ptr addrspace(1) %dst, ptr %dst.addr, align 4 + store i64 0, ptr %v, align 8 + %value = load i64, ptr %v, align 8 + ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt64]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] + %call = call spir_func i64 @_Z16sub_group_rotateli(i64 noundef %value, i32 noundef 2) #2 + %data = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx = getelementptr inbounds i64, ptr addrspace(1) %data, i32 0 + store i64 %call, ptr addrspace(1) %arrayidx, align 8 + %value_clustered = load i64, ptr %v, align 8 + ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt64]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] %[[ConstInt4]] + %call1 = call spir_func i64 @_Z26sub_group_clustered_rotatelij(i64 noundef %value_clustered, i32 noundef 2, i32 noundef 4) #2 + %data2 = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx2 = getelementptr inbounds i64, ptr addrspace(1) %data2, i32 1 + store i64 %call1, ptr addrspace(1) %arrayidx2, align 8 + ret void +} + +; Function Attrs: convergent nounwind +declare spir_func i64 @_Z16sub_group_rotateli(i64 noundef, i32 noundef) #1 + +; Function Attrs: convergent nounwind +declare spir_func i64 @_Z26sub_group_clustered_rotatelij(i64 noundef, i32 noundef, i32 noundef) #1 + +; Function Attrs: convergent noinline norecurse nounwind optnone +define dso_local spir_kernel void @testRotateULong(ptr addrspace(1) noundef align 8 %dst) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !6 { +entry: + %dst.addr = alloca ptr addrspace(1), align 4 + %v = alloca i64, align 8 + store ptr addrspace(1) %dst, ptr %dst.addr, align 4 + store i64 0, ptr %v, align 8 + %value = load i64, ptr %v, align 8 + ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt64]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] + %call = call spir_func i64 @_Z16sub_group_rotatemi(i64 noundef %value, i32 noundef 2) #2 + %data = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx = getelementptr inbounds i64, ptr addrspace(1) %data, i32 0 + store i64 %call, ptr addrspace(1) %arrayidx, align 8 + %value_clustered = load i64, ptr %v, align 8 + ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt64]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] %[[ConstInt4]] + %call1 = call spir_func i64 @_Z26sub_group_clustered_rotatemij(i64 noundef %value_clustered, i32 noundef 2, i32 noundef 4) #2 + %data2 = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx2 = getelementptr inbounds i64, ptr addrspace(1) %data2, i32 1 + store i64 %call1, ptr addrspace(1) %arrayidx2, align 8 + ret void +} + +; Function Attrs: convergent nounwind +declare spir_func i64 @_Z16sub_group_rotatemi(i64 noundef, i32 noundef) #1 + +; Function Attrs: convergent nounwind +declare spir_func i64 @_Z26sub_group_clustered_rotatemij(i64 noundef, i32 noundef, i32 noundef) #1 + +; Function Attrs: convergent noinline norecurse nounwind optnone +define dso_local spir_kernel void @testRotateFloat(ptr addrspace(1) noundef align 4 %dst) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !6 { +entry: + %dst.addr = alloca ptr addrspace(1), align 4 + %v = alloca float, align 4 + store ptr addrspace(1) %dst, ptr %dst.addr, align 4 + store float 0.000000e+00, ptr %v, align 4 + %value = load float, ptr %v, align 4 + ; CHECK: OpGroupNonUniformRotateKHR %[[TyFloat]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] + %call = call spir_func float @_Z16sub_group_rotatefi(float noundef %value, i32 noundef 2) #2 + %data = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %data, i32 0 + store float %call, ptr addrspace(1) %arrayidx, align 4 + %value_clustered = load float, ptr %v, align 4 + ; CHECK: OpGroupNonUniformRotateKHR %[[TyFloat]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] %[[ConstInt4]] + %call1 = call spir_func float @_Z26sub_group_clustered_rotatefij(float noundef %value_clustered, i32 noundef 2, i32 noundef 4) #2 + %data2 = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx2 = getelementptr inbounds float, ptr addrspace(1) %data2, i32 1 + store float %call1, ptr addrspace(1) %arrayidx2, align 4 + ret void +} + +; Function Attrs: convergent nounwind +declare spir_func float @_Z16sub_group_rotatefi(float noundef, i32 noundef) #1 + +; Function Attrs: convergent nounwind +declare spir_func float @_Z26sub_group_clustered_rotatefij(float noundef, i32 noundef, i32 noundef) #1 + +; Function Attrs: convergent noinline norecurse nounwind optnone +define dso_local spir_kernel void @testRotateHalf(ptr addrspace(1) noundef align 2 %dst) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !15 !kernel_arg_base_type !15 !kernel_arg_type_qual !6 { +entry: + %dst.addr = alloca ptr addrspace(1), align 4 + %v = alloca half, align 2 + store ptr addrspace(1) %dst, ptr %dst.addr, align 4 + store half 0xH0000, ptr %v, align 2 + %value = load half, ptr %v, align 2 + ; CHECK: OpGroupNonUniformRotateKHR %[[TyHalf]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] + %call = call spir_func half @_Z16sub_group_rotateDhi(half noundef %value, i32 noundef 2) #2 + %data = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx = getelementptr inbounds half, ptr addrspace(1) %data, i32 0 + store half %call, ptr addrspace(1) %arrayidx, align 2 + %value_clustered = load half, ptr %v, align 2 + ; CHECK: OpGroupNonUniformRotateKHR %[[TyHalf]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] %[[ConstInt4]] + %call1 = call spir_func half @_Z26sub_group_clustered_rotateDhij(half noundef %value_clustered, i32 noundef 2, i32 noundef 4) #2 + %data2 = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx2 = getelementptr inbounds half, ptr addrspace(1) %data2, i32 1 + store half %call1, ptr addrspace(1) %arrayidx2, align 2 + ret void +} + +; Function Attrs: convergent nounwind +declare spir_func half @_Z16sub_group_rotateDhi(half noundef, i32 noundef) #1 + +; Function Attrs: convergent nounwind +declare spir_func half @_Z26sub_group_clustered_rotateDhij(half noundef, i32 noundef, i32 noundef) #1 + +; Function Attrs: convergent noinline norecurse nounwind optnone +define dso_local spir_kernel void @testRotateDouble(ptr addrspace(1) noundef align 8 %dst) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !6 { +entry: + %dst.addr = alloca ptr addrspace(1), align 4 + %v = alloca double, align 8 + store ptr addrspace(1) %dst, ptr %dst.addr, align 4 + store double 0.000000e+00, ptr %v, align 8 + %value = load double, ptr %v, align 8 + ; CHECK: OpGroupNonUniformRotateKHR %[[TyDouble]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] + %call = call spir_func double @_Z16sub_group_rotatedi(double noundef %value, i32 noundef 2) #2 + %data = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx = getelementptr inbounds double, ptr addrspace(1) %data, i32 0 + store double %call, ptr addrspace(1) %arrayidx, align 8 + %value_clustered = load double, ptr %v, align 8 + ; CHECK: OpGroupNonUniformRotateKHR %[[TyDouble]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] %[[ConstInt4]] + %call1 = call spir_func double @_Z26sub_group_clustered_rotatedij(double noundef %value_clustered, i32 noundef 2, i32 noundef 4) #2 + %data2 = load ptr addrspace(1), ptr %dst.addr, align 4 + %arrayidx2 = getelementptr inbounds double, ptr addrspace(1) %data2, i32 1 + store double %call1, ptr addrspace(1) %arrayidx2, align 8 + ret void +} + +; Function Attrs: convergent nounwind +declare spir_func double @_Z16sub_group_rotatedi(double noundef, i32 noundef) #1 + +; Function Attrs: convergent nounwind +declare spir_func double @_Z26sub_group_clustered_rotatedij(double noundef, i32 noundef, i32 noundef) #1 + +attributes #0 = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } +attributes #1 = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #2 = { convergent nounwind } + +!llvm.module.flags = !{!0} +!opencl.ocl.version = !{!1} +!opencl.spir.version = !{!1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 2, i32 0} +!2 = !{!"clang version 19.0.0"} +!3 = !{i32 1} +!4 = !{!"none"} +!5 = !{!"char*"} +!6 = !{!""} +!7 = !{!"uchar*"} +!8 = !{!"short*"} +!9 = !{!"ushort*"} +!10 = !{!"int*"} +!11 = !{!"uint*"} +!12 = !{!"long*"} +!13 = !{!"ulong*"} +!14 = !{!"float*"} +!15 = !{!"half*"} +!16 = !{!"double*"}