From 4722eee693370e75f97a464086e99782238b9ffe Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Thu, 26 Jun 2025 13:05:51 -0700 Subject: [PATCH 1/3] convert raw buffer stores and loads of doubles and i64s --- .../Target/DirectX/DXILIntrinsicExpansion.cpp | 194 ++++++++++----- .../test/CodeGen/DirectX/BufferStoreDouble.ll | 8 +- llvm/test/CodeGen/DirectX/BufferStoreInt64.ll | 8 +- .../CodeGen/DirectX/RawBufferLoad-error64.ll | 24 -- .../CodeGen/DirectX/RawBufferLoadDouble.ll | 199 +++++++++++++++ .../CodeGen/DirectX/RawBufferLoadInt64.ll | 232 ++++++++++++++++++ .../CodeGen/DirectX/RawBufferStore-error64.ll | 20 -- .../CodeGen/DirectX/RawBufferStoreDouble.ll | 94 +++++++ .../CodeGen/DirectX/RawBufferStoreInt64.ll | 93 +++++++ 9 files changed, 763 insertions(+), 109 deletions(-) delete mode 100644 llvm/test/CodeGen/DirectX/RawBufferLoad-error64.ll create mode 100644 llvm/test/CodeGen/DirectX/RawBufferLoadDouble.ll create mode 100644 llvm/test/CodeGen/DirectX/RawBufferLoadInt64.ll delete mode 100644 llvm/test/CodeGen/DirectX/RawBufferStore-error64.ll create mode 100644 llvm/test/CodeGen/DirectX/RawBufferStoreDouble.ll create mode 100644 llvm/test/CodeGen/DirectX/RawBufferStoreInt64.ll diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index 435b80ecaec64..0770f03572d5a 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -71,12 +71,23 @@ static bool isIntrinsicExpansion(Function &F) { case Intrinsic::vector_reduce_add: case Intrinsic::vector_reduce_fadd: return true; + case Intrinsic::dx_resource_load_rawbuffer: + if (F.getParent()->getTargetTriple().getDXILVersion() > VersionTuple(1, 2)) + return false; + // fallthrough to check if double or i64 + LLVM_FALLTHROUGH; case Intrinsic::dx_resource_load_typedbuffer: { // We need to handle i64, doubles, and vectors of them. Type *ScalarTy = F.getReturnType()->getStructElementType(0)->getScalarType(); return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64); } + case Intrinsic::dx_resource_store_rawbuffer: { + if (F.getParent()->getTargetTriple().getDXILVersion() > VersionTuple(1, 2)) + return false; + Type *ScalarTy = F.getFunctionType()->getParamType(3)->getScalarType(); + return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64); + } case Intrinsic::dx_resource_store_typedbuffer: { // We need to handle i64 and doubles and vectors of i64 and doubles. Type *ScalarTy = F.getFunctionType()->getParamType(2)->getScalarType(); @@ -544,7 +555,7 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) { return Builder.CreateFMul(X, PiOver180); } -static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) { +static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw) { IRBuilder<> Builder(Orig); Type *BufferTy = Orig->getType()->getStructElementType(0); @@ -552,55 +563,73 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) { bool IsDouble = ScalarTy->isDoubleTy(); assert(IsDouble || ScalarTy->isIntegerTy(64) && "Only expand double or int64 scalars or vectors"); + bool IsVector = isa(BufferTy); unsigned ExtractNum = 2; if (auto *VT = dyn_cast(BufferTy)) { - assert(VT->getNumElements() == 2 && - "TypedBufferLoad vector must be size 2"); - ExtractNum = 4; + if (!IsRaw) + assert(VT->getNumElements() == 2 && + "TypedBufferLoad vector must be size 2"); + ExtractNum = 2 * VT->getNumElements(); } - Type *Ty = VectorType::get(Builder.getInt32Ty(), ExtractNum, false); - - Type *LoadType = StructType::get(Ty, Builder.getInt1Ty()); - CallInst *Load = - Builder.CreateIntrinsic(LoadType, Intrinsic::dx_resource_load_typedbuffer, - {Orig->getOperand(0), Orig->getOperand(1)}); - - // extract the buffer load's result - Value *Extract = Builder.CreateExtractValue(Load, {0}); - - SmallVector ExtractElements; - for (unsigned I = 0; I < ExtractNum; ++I) - ExtractElements.push_back( - Builder.CreateExtractElement(Extract, Builder.getInt32(I))); - - // combine into double(s) or int64(s) + SmallVector Loads; Value *Result = PoisonValue::get(BufferTy); - for (unsigned I = 0; I < ExtractNum; I += 2) { - Value *Combined = nullptr; - if (IsDouble) - // For doubles, use dx_asdouble intrinsic - Combined = - Builder.CreateIntrinsic(Builder.getDoubleTy(), Intrinsic::dx_asdouble, - {ExtractElements[I], ExtractElements[I + 1]}); - else { - // For int64, manually combine two int32s - // First, zero-extend both values to i64 - Value *Lo = Builder.CreateZExt(ExtractElements[I], Builder.getInt64Ty()); - Value *Hi = - Builder.CreateZExt(ExtractElements[I + 1], Builder.getInt64Ty()); - // Shift the high bits left by 32 bits - Value *ShiftedHi = Builder.CreateShl(Hi, Builder.getInt64(32)); - // OR the high and low bits together - Combined = Builder.CreateOr(Lo, ShiftedHi); + unsigned Base = 0; + while (ExtractNum > 0) { + unsigned LoadNum = std::min(ExtractNum, 4u); + Type *Ty = VectorType::get(Builder.getInt32Ty(), LoadNum, false); + + Type *LoadType = StructType::get(Ty, Builder.getInt1Ty()); + Intrinsic::ID LoadIntrinsic = Intrinsic::dx_resource_load_typedbuffer; + SmallVector Args = {Orig->getOperand(0), Orig->getOperand(1)}; + if (IsRaw) { + LoadIntrinsic = Intrinsic::dx_resource_load_rawbuffer; + Value *Tmp = Builder.getInt32(4 * Base * 2); + Args.push_back(Builder.CreateAdd(Orig->getOperand(2), Tmp)); } - if (ExtractNum == 4) - Result = Builder.CreateInsertElement(Result, Combined, - Builder.getInt32(I / 2)); - else - Result = Combined; + CallInst *Load = Builder.CreateIntrinsic(LoadType, LoadIntrinsic, Args); + Loads.push_back(Load); + + // extract the buffer load's result + Value *Extract = Builder.CreateExtractValue(Load, {0}); + + SmallVector ExtractElements; + for (unsigned I = 0; I < LoadNum; ++I) + ExtractElements.push_back( + Builder.CreateExtractElement(Extract, Builder.getInt32(I))); + + // combine into double(s) or int64(s) + for (unsigned I = 0; I < LoadNum; I += 2) { + Value *Combined = nullptr; + if (IsDouble) + // For doubles, use dx_asdouble intrinsic + Combined = Builder.CreateIntrinsic( + Builder.getDoubleTy(), Intrinsic::dx_asdouble, + {ExtractElements[I], ExtractElements[I + 1]}); + else { + // For int64, manually combine two int32s + // First, zero-extend both values to i64 + Value *Lo = + Builder.CreateZExt(ExtractElements[I], Builder.getInt64Ty()); + Value *Hi = + Builder.CreateZExt(ExtractElements[I + 1], Builder.getInt64Ty()); + // Shift the high bits left by 32 bits + Value *ShiftedHi = Builder.CreateShl(Hi, Builder.getInt64(32)); + // OR the high and low bits together + Combined = Builder.CreateOr(Lo, ShiftedHi); + } + + if (IsVector) + Result = Builder.CreateInsertElement(Result, Combined, + Builder.getInt32((I / 2) + Base)); + else + Result = Combined; + } + + ExtractNum -= LoadNum; + Base += LoadNum / 2; } Value *CheckBit = nullptr; @@ -620,8 +649,12 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) { } else { // Use of the check bit assert(Indices[0] == 1 && "Unexpected type for typedbufferload"); - if (!CheckBit) - CheckBit = Builder.CreateExtractValue(Load, {1}); + if (!CheckBit) { + SmallVector CheckBits; + for (Value *L : Loads) + CheckBits.push_back(Builder.CreateExtractValue(L, {1})); + CheckBit = Builder.CreateAnd(CheckBits); + } EVI->replaceAllUsesWith(CheckBit); } EVI->eraseFromParent(); @@ -630,10 +663,10 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) { return true; } -static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) { +static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw) { IRBuilder<> Builder(Orig); - Type *BufferTy = Orig->getFunctionType()->getParamType(2); + Type *BufferTy = Orig->getFunctionType()->getParamType(IsRaw ? 3 : 2); Type *ScalarTy = BufferTy->getScalarType(); bool IsDouble = ScalarTy->isDoubleTy(); assert((IsDouble || ScalarTy->isIntegerTy(64)) && @@ -641,19 +674,24 @@ static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) { // Determine if we're dealing with a vector or scalar bool IsVector = isa(BufferTy); - if (IsVector) { - assert(cast(BufferTy)->getNumElements() == 2 && - "TypedBufferStore vector must be size 2"); + unsigned ExtractNum = 2; + unsigned VecLen = 0; + if (auto *VT = dyn_cast(BufferTy)) { + if (!IsRaw) + assert(VT->getNumElements() == 2 && + "TypedBufferStore vector must be size 2"); + VecLen = VT->getNumElements(); + ExtractNum = VecLen * 2; } // Create the appropriate vector type for the result Type *Int32Ty = Builder.getInt32Ty(); - Type *ResultTy = VectorType::get(Int32Ty, IsVector ? 4 : 2, false); + Type *ResultTy = VectorType::get(Int32Ty, ExtractNum, false); Value *Val = PoisonValue::get(ResultTy); Type *SplitElementTy = Int32Ty; if (IsVector) - SplitElementTy = VectorType::get(SplitElementTy, 2, false); + SplitElementTy = VectorType::get(SplitElementTy, VecLen, false); Value *LowBits = nullptr; Value *HighBits = nullptr; @@ -661,15 +699,16 @@ static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) { if (IsDouble) { auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy); Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble, - {Orig->getOperand(2)}); + {Orig->getOperand(IsRaw ? 3 : 2)}); LowBits = Builder.CreateExtractValue(Split, 0); HighBits = Builder.CreateExtractValue(Split, 1); } else { // Handle int64 type(s) - Value *InputVal = Orig->getOperand(2); + Value *InputVal = Orig->getOperand(IsRaw ? 3 : 2); Constant *ShiftAmt = Builder.getInt64(32); if (IsVector) - ShiftAmt = ConstantVector::getSplat(ElementCount::getFixed(2), ShiftAmt); + ShiftAmt = + ConstantVector::getSplat(ElementCount::getFixed(VecLen), ShiftAmt); // Split into low and high 32-bit parts LowBits = Builder.CreateTrunc(InputVal, SplitElementTy); @@ -678,17 +717,42 @@ static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) { } if (IsVector) { - Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3}); + SmallVector Mask; + for (unsigned I = 0; I < VecLen; ++I) { + Mask.push_back(I); + Mask.push_back(I + VecLen); + } + Val = Builder.CreateShuffleVector(LowBits, HighBits, Mask); } else { Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0)); Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1)); } - // Create the final intrinsic call - Builder.CreateIntrinsic(Builder.getVoidTy(), - Intrinsic::dx_resource_store_typedbuffer, - {Orig->getOperand(0), Orig->getOperand(1), Val}); + unsigned Base = 0; + while (ExtractNum > 0) { + unsigned StoreNum = std::min(ExtractNum, 4u); + + Intrinsic::ID StoreIntrinsic = Intrinsic::dx_resource_store_typedbuffer; + SmallVector Args = {Orig->getOperand(0), Orig->getOperand(1)}; + if (IsRaw) { + StoreIntrinsic = Intrinsic::dx_resource_store_rawbuffer; + Value *Tmp = Builder.getInt32(4 * Base); + Args.push_back(Builder.CreateAdd(Orig->getOperand(2), Tmp)); + } + + SmallVector Mask; + for (unsigned I = 0; I < StoreNum; ++I) { + Mask.push_back(Base + I); + } + Value *SubVal = Builder.CreateShuffleVector(Val, Mask); + + Args.push_back(SubVal); + // Create the final intrinsic call + Builder.CreateIntrinsic(Builder.getVoidTy(), StoreIntrinsic, Args); + ExtractNum -= StoreNum; + Base += StoreNum; + } Orig->eraseFromParent(); return true; } @@ -821,12 +885,20 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { case Intrinsic::dx_radians: Result = expandRadiansIntrinsic(Orig); break; + case Intrinsic::dx_resource_load_rawbuffer: + if (expandBufferLoadIntrinsic(Orig, /*IsRaw*/ true)) + return true; + break; + case Intrinsic::dx_resource_store_rawbuffer: + if (expandBufferStoreIntrinsic(Orig, /*IsRaw*/ true)) + return true; + break; case Intrinsic::dx_resource_load_typedbuffer: - if (expandTypedBufferLoadIntrinsic(Orig)) + if (expandBufferLoadIntrinsic(Orig, /*IsRaw*/ false)) return true; break; case Intrinsic::dx_resource_store_typedbuffer: - if (expandTypedBufferStoreIntrinsic(Orig)) + if (expandBufferStoreIntrinsic(Orig, /*IsRaw*/ false)) return true; break; case Intrinsic::usub_sat: diff --git a/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll b/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll index 9c3dab0cc1e46..560bb56d34d45 100644 --- a/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll +++ b/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll @@ -16,8 +16,10 @@ define void @storef64(double %0) { ; CHECK: [[Hi:%.*]] = extractvalue { i32, i32 } [[SD]], 1 ; CHECK: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[Lo]], i32 0 ; CHECK: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[Hi]], i32 1 + ; this shufflevector is unnecessary but generated to avoid specalization + ; CHECK: [[Vec3:%.*]] = shufflevector <2 x i32> [[Vec2]], <2 x i32> poison, <2 x i32> ; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_f64_1_0_0t.v2i32( - ; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0, <2 x i32> [[Vec2]]) + ; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0, <2 x i32> [[Vec3]]) call void @llvm.dx.resource.store.typedbuffer( target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0, double %0) @@ -38,8 +40,10 @@ define void @storev2f64(<2 x double> %0) { ; CHECK: [[Lo:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 0 ; CHECK: [[Hi:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 1 ; CHECK: [[Vec:%.*]] = shufflevector <2 x i32> [[Lo]], <2 x i32> [[Hi]], <4 x i32> + ; this shufflevector is unnecessary but generated to avoid specalization + ; CHECK: [[Vec2:%.*]] = shufflevector <4 x i32> [[Vec]], <4 x i32> poison, <4 x i32> ; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2f64_1_0_0t.v4i32( - ; CHECK-SAME: target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[B]], i32 0, <4 x i32> [[Vec]]) + ; CHECK-SAME: target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[B]], i32 0, <4 x i32> [[Vec2]]) call void @llvm.dx.resource.store.typedbuffer( target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0, <2 x double> %0) diff --git a/llvm/test/CodeGen/DirectX/BufferStoreInt64.ll b/llvm/test/CodeGen/DirectX/BufferStoreInt64.ll index c97a02d1873a0..31031804a0e8b 100644 --- a/llvm/test/CodeGen/DirectX/BufferStoreInt64.ll +++ b/llvm/test/CodeGen/DirectX/BufferStoreInt64.ll @@ -12,7 +12,9 @@ define void @storei64(i64 %0) { ; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP4]], i32 1 -; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_i64_1_0_0t.v2i32(target("dx.TypedBuffer", i64, 1, 0, 0) [[BUFFER]], i32 0, <2 x i32> [[TMP6]]) +; the shufflevector is unnecessary but generated to avoid too much specalization +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_i64_1_0_0t.v2i32(target("dx.TypedBuffer", i64, 1, 0, 0) [[BUFFER]], i32 0, <2 x i32> [[TMP7]]) ; CHECK-NEXT: ret void ; %buffer = tail call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) @@ -29,7 +31,9 @@ define void @storev2i64(<2 x i64> %0) { ; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP0]], splat (i64 32) ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP3]] to <2 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> -; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2i64_1_0_0t.v4i32(target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) [[BUFFER]], i32 0, <4 x i32> [[TMP13]]) +; the shufflevector is unnecessary but generated to avoid too much specalization +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2i64_1_0_0t.v4i32(target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) [[BUFFER]], i32 0, <4 x i32> [[TMP14]]) ; CHECK-NEXT: ret void ; %buffer = tail call target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) diff --git a/llvm/test/CodeGen/DirectX/RawBufferLoad-error64.ll b/llvm/test/CodeGen/DirectX/RawBufferLoad-error64.ll deleted file mode 100644 index d8b6311c8ff2e..0000000000000 --- a/llvm/test/CodeGen/DirectX/RawBufferLoad-error64.ll +++ /dev/null @@ -1,24 +0,0 @@ -; We use llc for this test so that we don't abort after the first error. -; RUN: not llc %s -o /dev/null 2>&1 | FileCheck %s - -target triple = "dxil-pc-shadermodel6.2-compute" - -declare void @v4f64_user(<4 x double>) - -; Can't load 64 bit types directly until SM6.3 (byteaddressbuf.Load) -; CHECK: error: -; CHECK-SAME: in function loadv4f64_byte -; CHECK-SAME: Cannot create RawBufferLoad operation: Invalid overload type -define void @loadv4f64_byte(i32 %offset) "hlsl.export" { - %buffer = call target("dx.RawBuffer", i8, 0, 0, 0) - @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0_0( - i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) - - %load = call {<4 x double>, i1} @llvm.dx.resource.load.rawbuffer.v4i64( - target("dx.RawBuffer", i8, 0, 0, 0) %buffer, i32 %offset, i32 0) - %data = extractvalue {<4 x double>, i1} %load, 0 - - call void @v4f64_user(<4 x double> %data) - - ret void -} diff --git a/llvm/test/CodeGen/DirectX/RawBufferLoadDouble.ll b/llvm/test/CodeGen/DirectX/RawBufferLoadDouble.ll new file mode 100644 index 0000000000000..dc0c19dad9e06 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/RawBufferLoadDouble.ll @@ -0,0 +1,199 @@ +; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s + +target triple = "dxil-pc-shadermodel6.2-compute" + +define void @loadf64(i32 %index) { + ; check the handle from binding is unchanged + ; CHECK: [[B:%.*]] = call target("dx.Rawbuffer", double, 0, 0) + ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_f64_0_0t( + ; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false, ptr null) + %buffer = call target("dx.Rawbuffer", double, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_f64_1_0_0t( + i32 0, i32 1, i32 1, i32 0, i1 false, ptr null) + + ; check we load an <2 x i32> instead of a double + ; CHECK-NOT: call {double, i1} @llvm.dx.resource.load.rawbuffer + ; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 } + ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v2i32.tdx.Rawbuffer_f64_0_0t( + ; CHECK-SAME: target("dx.Rawbuffer", double, 0, 0) [[B]], i32 %index, i32 0) + %load0 = call {double, i1} @llvm.dx.resource.load.rawbuffer( + target("dx.Rawbuffer", double, 0, 0) %buffer, i32 %index, i32 0) + + ; check we extract the two i32 and construct a double + ; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0 + ; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i32 0 + ; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i32 1 + ; CHECK: [[DBL:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]]) + ; CHECK-NOT: extractvalue { double, i1 } + %data0 = extractvalue {double, i1} %load0, 0 + ret void +} + +define void @loadv2f64(i32 %index) { + ; check the handle from binding is unchanged + ; CHECK: [[B:%.*]] = call target("dx.Rawbuffer", <2 x double>, 0, 0) + ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v2f64_0_0t( + ; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false, ptr null) + %buffer = call target("dx.Rawbuffer", <2 x double>, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v2f64_1_0_0t( + i32 0, i32 1, i32 1, i32 0, i1 false, ptr null) + + ; check we load an <4 x i32> instead of a double2 + ; CHECK: [[L0:%.*]] = call { <4 x i32>, i1 } + ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v2f64_0_0t( + ; CHECK-SAME: target("dx.Rawbuffer", <2 x double>, 0, 0) [[B]], i32 %index, i32 0) + %load0 = call { <2 x double>, i1 } @llvm.dx.resource.load.rawbuffer( + target("dx.Rawbuffer", <2 x double>, 0, 0) %buffer, i32 %index, i32 0) + + ; check we extract the 4 i32 and construct a <2 x double> + ; CHECK: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0 + ; CHECK: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0 + ; CHECK: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1 + ; CHECK: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2 + ; CHECK: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3 + ; CHECK: [[Dbl1:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo1]], i32 [[Hi1]]) + ; CHECK: [[Vec:%.*]] = insertelement <2 x double> poison, double [[Dbl1]], i32 0 + ; CHECK: [[Dbl2:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo2]], i32 [[Hi2]]) + ; CHECK: [[Vec2:%.*]] = insertelement <2 x double> [[Vec]], double [[Dbl2]], i32 1 + ; CHECK-NOT: extractvalue { <2 x double>, i1 } + %data0 = extractvalue { <2 x double>, i1 } %load0, 0 + ret void +} + +; show we properly handle extracting the check bit +define void @loadf64WithCheckBit(i32 %index) { + ; check the handle from binding is unchanged + ; CHECK: [[B:%.*]] = call target("dx.Rawbuffer", double, 0, 0) + ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_f64_0_0t( + ; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false, ptr null) + %buffer = call target("dx.Rawbuffer", double, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_f64_1_0_0t( + i32 0, i32 1, i32 1, i32 0, i1 false, ptr null) + + ; check we load an <2 x i32> instead of a double + ; CHECK-NOT: call {double, i1} @llvm.dx.resource.load.rawbuffer + ; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 } + ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v2i32.tdx.Rawbuffer_f64_0_0t( + ; CHECK-SAME: target("dx.Rawbuffer", double, 0, 0) [[B]], i32 %index, i32 0) + %load0 = call {double, i1} @llvm.dx.resource.load.rawbuffer( + target("dx.Rawbuffer", double, 0, 0) %buffer, i32 %index, i32 0) + + ; check we extract the two i32 and construct a double + ; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0 + ; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i32 0 + ; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i32 1 + ; CHECK: [[DBL:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]]) + %data0 = extractvalue {double, i1} %load0, 0 + ; CHECK: extractvalue { <2 x i32>, i1 } [[L0]], 1 + ; CHECK-NOT: extractvalue { double, i1 } + %cb = extractvalue {double, i1} %load0, 1 + ret void +} + +; Raw Buffer Load allows for double3 and double4 to be loaded +; In SM6.2 and below, two loads will be performed. +; Show we and the checkbits together + +define void @loadv3f64(i32 %index) { + ; check the handle from binding is unchanged + ; CHECK: [[B:%.*]] = call target("dx.Rawbuffer", <3 x double>, 0, 0) + ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v3f64_0_0t( + ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + %buffer = call target("dx.Rawbuffer", <3 x double>, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v3f64_0_0t( + i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + + ; check we perform two loads + ; and do 6 extracts and construct 3 doubles + ; CHECK-NOT: call {<3 x double>, i1} @llvm.dx.resource.load.rawbuffer + ; CHECK: [[L0:%.*]] = call { <4 x i32>, i1 } + ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v3f64_0_0t( + ; CHECK-SAME: target("dx.Rawbuffer", <3 x double>, 0, 0) [[B]], i32 %index, i32 0) + + ; CHECK: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0 + ; CHECK: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0 + ; CHECK: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1 + ; CHECK: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2 + ; CHECK: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3 + ; CHECK: [[DBL1:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo1]], i32 [[Hi1]]) + ; CHECK: [[Vec1:%.*]] = insertelement <3 x double> poison, double [[DBL1]], i32 0 + ; CHECK: [[DBL2:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo2]], i32 [[Hi2]]) + ; CHECK: [[Vec2:%.*]] = insertelement <3 x double> [[Vec1]], double [[DBL2]], i32 1 + + ; 2nd load + ; CHECK: [[L2:%.*]] = call { <2 x i32>, i1 } + ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v2i32.tdx.Rawbuffer_v3f64_0_0t( + ; CHECK-SAME: target("dx.Rawbuffer", <3 x double>, 0, 0) [[B]], i32 %index, i32 16) + + ; CHECK: [[D2:%.*]] = extractvalue { <2 x i32>, i1 } [[L2]], 0 + ; CHECK: [[Lo3:%.*]] = extractelement <2 x i32> [[D2]], i32 0 + ; CHECK: [[Hi3:%.*]] = extractelement <2 x i32> [[D2]], i32 1 + ; CHECK: [[DBL3:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo3]], i32 [[Hi3]]) + ; CHECK: [[Vec3:%.*]] = insertelement <3 x double> [[Vec2]], double [[DBL3]], i32 2 + %load0 = call {<3 x double>, i1} @llvm.dx.resource.load.rawbuffer( + target("dx.Rawbuffer", <3 x double>, 0, 0) %buffer, i32 %index, i32 0) + + + ; CHECK-NOT: extractvalue {<3 x double>, i1 } + %data0 = extractvalue {<3 x double>, i1} %load0, 0 + ; check we extract checkbit from both loads and and them together + ; CHECK: [[B1:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 1 + ; CHECK: [[B2:%.*]] = extractvalue { <2 x i32>, i1 } [[L2]], 1 + ; CHECK: and i1 [[B1]], [[B2]] + %cb = extractvalue {<3 x double>, i1} %load0, 1 + ret void +} + +define void @loadv4f64(i32 %index) { + ; check the handle from binding is unchanged + ; CHECK: [[B:%.*]] = call target("dx.Rawbuffer", <4 x double>, 0, 0) + ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v4f64_0_0t( + ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + %buffer = call target("dx.Rawbuffer", <4 x double>, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v4f64_0_0t( + i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + + ; check we perform two loads + ; and do 8 extracts and construct 4 doubles + ; CHECK-NOT: call {<4 x double>, i1} @llvm.dx.resource.load.rawbuffer + ; CHECK: [[L0:%.*]] = call { <4 x i32>, i1 } + ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v4f64_0_0t( + ; CHECK-SAME: target("dx.Rawbuffer", <4 x double>, 0, 0) [[B]], i32 %index, i32 0) + + ; CHECK: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0 + ; CHECK: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0 + ; CHECK: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1 + ; CHECK: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2 + ; CHECK: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3 + ; CHECK: [[DBL1:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo1]], i32 [[Hi1]]) + ; CHECK: [[Vec1:%.*]] = insertelement <4 x double> poison, double [[DBL1]], i32 0 + ; CHECK: [[DBL2:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo2]], i32 [[Hi2]]) + ; CHECK: [[Vec2:%.*]] = insertelement <4 x double> [[Vec1]], double [[DBL2]], i32 1 + + ; 2nd load + ; CHECK: [[L2:%.*]] = call { <4 x i32>, i1 } + ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v4f64_0_0t( + ; CHECK-SAME: target("dx.Rawbuffer", <4 x double>, 0, 0) [[B]], i32 %index, i32 16) + + ; CHECK: [[D2:%.*]] = extractvalue { <4 x i32>, i1 } [[L2]], 0 + ; CHECK: [[Lo3:%.*]] = extractelement <4 x i32> [[D2]], i32 0 + ; CHECK: [[Hi3:%.*]] = extractelement <4 x i32> [[D2]], i32 1 + ; CHECK: [[Lo4:%.*]] = extractelement <4 x i32> [[D2]], i32 2 + ; CHECK: [[Hi4:%.*]] = extractelement <4 x i32> [[D2]], i32 3 + ; CHECK: [[DBL3:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo3]], i32 [[Hi3]]) + ; CHECK: [[Vec3:%.*]] = insertelement <4 x double> [[Vec2]], double [[DBL3]], i32 2 + ; CHECK: [[DBL4:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo4]], i32 [[Hi4]]) + ; CHECK: [[Vec4:%.*]] = insertelement <4 x double> [[Vec3]], double [[DBL4]], i32 3 + %load0 = call {<4 x double>, i1} @llvm.dx.resource.load.rawbuffer( + target("dx.Rawbuffer", <4 x double>, 0, 0) %buffer, i32 %index, i32 0) + + + ; CHECK-NOT: extractvalue {<4 x double>, i1 } + %data0 = extractvalue {<4 x double>, i1} %load0, 0 + ; check we extract checkbit from both loads and and them together + ; CHECK: [[B1:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 1 + ; CHECK: [[B2:%.*]] = extractvalue { <4 x i32>, i1 } [[L2]], 1 + ; CHECK: and i1 [[B1]], [[B2]] + %cb = extractvalue {<4 x double>, i1} %load0, 1 + ret void +} diff --git a/llvm/test/CodeGen/DirectX/RawBufferLoadInt64.ll b/llvm/test/CodeGen/DirectX/RawBufferLoadInt64.ll new file mode 100644 index 0000000000000..2ed939fd48226 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/RawBufferLoadInt64.ll @@ -0,0 +1,232 @@ +; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s + +target triple = "dxil-pc-shadermodel6.2-compute" + +define void @loadi64(i32 %index) { + ; check the handle from binding is unchanged + ; CHECK: [[B:%.*]] = call target("dx.Rawbuffer", i64, 1, 0, 0) + ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_i64_1_0_0t( + ; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false, ptr null) + %buffer = call target("dx.Rawbuffer", i64, 1, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_i64_1_0_0t( + i32 0, i32 1, i32 1, i32 0, i1 false, ptr null) + + ; check we load an <2 x i32> instead of a i64 + ; CHECK-NOT: call {i64, i1} @llvm.dx.resource.load.rawbuffer + ; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 } + ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v2i32.tdx.Rawbuffer_i64_1_0_0t( + ; CHECK-SAME: target("dx.Rawbuffer", i64, 1, 0, 0) [[B]], i32 %index, i32 0) + %load0 = call {i64, i1} @llvm.dx.resource.load.rawbuffer( + target("dx.Rawbuffer", i64, 1, 0, 0) %buffer, i32 %index, i32 0) + + ; check we extract the two i32 and construct a i64 + ; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0 + ; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i32 0 + ; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i32 1 + ; CHECK: [[ZLo1:%.*]] = zext i32 [[Lo]] to i64 + ; CHECK: [[ZHi1:%.*]] = zext i32 [[Hi]] to i64 + ; CHECK: [[A:%.*]] = shl i64 [[ZHi1]], 32 + ; CHECK: [[B:%.*]] = or i64 [[ZLo1]], [[A]] + ; CHECK-NOT: extractvalue { i64, i1 } + %data0 = extractvalue {i64, i1} %load0, 0 + ret void +} + +define void @loadv2i64(i32 %index) { + ; check the handle from binding is unchanged + ; CHECK: [[B:%.*]] = call target("dx.Rawbuffer", <2 x i64>, 1, 0, 0) + ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v2i64_1_0_0t( + ; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false, ptr null) + %buffer = call target("dx.Rawbuffer", <2 x i64>, 1, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v2i64_1_0_0t( + i32 0, i32 1, i32 1, i32 0, i1 false, ptr null) + + ; check we load an <4 x i32> instead of a i642 + ; CHECK: [[L0:%.*]] = call { <4 x i32>, i1 } + ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v2i64_1_0_0t( + ; CHECK-SAME: target("dx.Rawbuffer", <2 x i64>, 1, 0, 0) [[B]], i32 %index, i32 0) + %load0 = call { <2 x i64>, i1 } @llvm.dx.resource.load.rawbuffer( + target("dx.Rawbuffer", <2 x i64>, 1, 0, 0) %buffer, i32 %index, i32 0) + + ; check we extract the 4 i32 and construct a <2 x i64> + ; CHECK: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0 + ; CHECK: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0 + ; CHECK: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1 + ; CHECK: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2 + ; CHECK: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3 + ; CHECK: [[ZLo1:%.*]] = zext i32 [[Lo1]] to i64 + ; CHECK: [[ZHi1:%.*]] = zext i32 [[Hi1]] to i64 + ; CHECK: [[A:%.*]] = shl i64 [[ZHi1]], 32 + ; CHECK: [[B:%.*]] = or i64 [[ZLo1]], [[A]] + ; CHECK: [[Vec:%.*]] = insertelement <2 x i64> poison, i64 [[B]], i32 0 + ; CHECK: [[ZLo2:%.*]] = zext i32 [[Lo2]] to i64 + ; CHECK: [[ZHi2:%.*]] = zext i32 [[Hi2]] to i64 + ; CHECK: [[A:%.*]] = shl i64 [[ZHi2]], 32 + ; CHECK: [[B:%.*]] = or i64 [[ZLo2]], [[A]] + ; CHECK: [[Vec2:%.*]] = insertelement <2 x i64> [[Vec]], i64 [[B]], i32 1 + ; CHECK-NOT: extractvalue { <2 x i64>, i1 } + %data0 = extractvalue { <2 x i64>, i1 } %load0, 0 + ret void +} + +; show we properly handle extracting the check bit +define void @loadi64WithCheckBit(i32 %index) { + ; check the handle from binding is unchanged + ; CHECK: [[B:%.*]] = call target("dx.Rawbuffer", i64, 1, 0, 0) + ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_i64_1_0_0t( + ; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false, ptr null) + %buffer = call target("dx.Rawbuffer", i64, 1, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_i64_1_0_0t( + i32 0, i32 1, i32 1, i32 0, i1 false, ptr null) + + ; check we load an <2 x i32> instead of a i64 + ; CHECK-NOT: call {i64, i1} @llvm.dx.resource.load.rawbuffer + ; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 } + ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v2i32.tdx.Rawbuffer_i64_1_0_0t( + ; CHECK-SAME: target("dx.Rawbuffer", i64, 1, 0, 0) [[B]], i32 %index, i32 0) + %load0 = call {i64, i1} @llvm.dx.resource.load.rawbuffer( + target("dx.Rawbuffer", i64, 1, 0, 0) %buffer, i32 %index, i32 0) + + ; check we extract the two i32 and construct a i64 + ; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0 + ; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i32 0 + ; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i32 1 + ; CHECK: [[ZLo1:%.*]] = zext i32 [[Lo]] to i64 + ; CHECK: [[ZHi1:%.*]] = zext i32 [[Hi]] to i64 + ; CHECK: [[A:%.*]] = shl i64 [[ZHi1]], 32 + ; CHECK: [[B:%.*]] = or i64 [[ZLo1]], [[A]] + %data0 = extractvalue {i64, i1} %load0, 0 + ; CHECK: extractvalue { <2 x i32>, i1 } [[L0]], 1 + ; CHECK-NOT: extractvalue { i64, i1 } + %cb = extractvalue {i64, i1} %load0, 1 + ret void +} + +; Raw Buffer Load allows for i64_t3 and i64_t4 to be loaded +; In SM6.2 and below, two loads will be performed. +; Show we and the checkbits together + +define void @loadv3i64(i32 %index) { + ; check the handle from binding is unchanged + ; CHECK: [[Buf:%.*]] = call target("dx.Rawbuffer", <3 x i64>, 0, 0) + ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v3i64_0_0t( + ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + %buffer = call target("dx.Rawbuffer", <3 x i64>, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v3i64_0_0t( + i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + + ; check we perform two loads + ; and do 6 extracts and construct 3 i64s + ; CHECK-NOT: call {<3 x i64>, i1} @llvm.dx.resource.load.rawbuffer + ; CHECK: [[L0:%.*]] = call { <4 x i32>, i1 } + ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v3i64_0_0t( + ; CHECK-SAME: target("dx.Rawbuffer", <3 x i64>, 0, 0) [[Buf]], i32 %index, i32 0) + + ; CHECK: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0 + ; CHECK: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0 + ; CHECK: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1 + ; CHECK: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2 + ; CHECK: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3 + ; CHECK: [[ZLo1:%.*]] = zext i32 [[Lo1]] to i64 + ; CHECK: [[ZHi1:%.*]] = zext i32 [[Hi1]] to i64 + ; CHECK: [[A:%.*]] = shl i64 [[ZHi1]], 32 + ; CHECK: [[B:%.*]] = or i64 [[ZLo1]], [[A]] + ; CHECK: [[Vec1:%.*]] = insertelement <3 x i64> poison, i64 [[B]], i32 0 + ; CHECK: [[ZLo2:%.*]] = zext i32 [[Lo2]] to i64 + ; CHECK: [[ZHi2:%.*]] = zext i32 [[Hi2]] to i64 + ; CHECK: [[A:%.*]] = shl i64 [[ZHi2]], 32 + ; CHECK: [[B:%.*]] = or i64 [[ZLo2]], [[A]] + ; CHECK: [[Vec2:%.*]] = insertelement <3 x i64> [[Vec1]], i64 [[B]], i32 1 + + ; 2nd load + ; CHECK: [[L2:%.*]] = call { <2 x i32>, i1 } + ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v2i32.tdx.Rawbuffer_v3i64_0_0t( + ; CHECK-SAME: target("dx.Rawbuffer", <3 x i64>, 0, 0) [[Buf]], i32 %index, i32 16) + + ; CHECK: [[D2:%.*]] = extractvalue { <2 x i32>, i1 } [[L2]], 0 + ; CHECK: [[Lo3:%.*]] = extractelement <2 x i32> [[D2]], i32 0 + ; CHECK: [[Hi3:%.*]] = extractelement <2 x i32> [[D2]], i32 1 + ; CHECK: [[ZLo3:%.*]] = zext i32 [[Lo3]] to i64 + ; CHECK: [[ZHi3:%.*]] = zext i32 [[Hi3]] to i64 + ; CHECK: [[A:%.*]] = shl i64 [[ZHi3]], 32 + ; CHECK: [[B:%.*]] = or i64 [[ZLo3]], [[A]] + ; CHECK: [[Vec3:%.*]] = insertelement <3 x i64> [[Vec2]], i64 [[B]], i32 2 + %load0 = call {<3 x i64>, i1} @llvm.dx.resource.load.rawbuffer( + target("dx.Rawbuffer", <3 x i64>, 0, 0) %buffer, i32 %index, i32 0) + + + ; CHECK-NOT: extractvalue {<3 x i64>, i1 } + %data0 = extractvalue {<3 x i64>, i1} %load0, 0 + ; check we extract checkbit from both loads and and them together + ; CHECK: [[B1:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 1 + ; CHECK: [[B2:%.*]] = extractvalue { <2 x i32>, i1 } [[L2]], 1 + ; CHECK: and i1 [[B1]], [[B2]] + %cb = extractvalue {<3 x i64>, i1} %load0, 1 + ret void +} + +define void @loadv4i64(i32 %index) { + ; check the handle from binding is unchanged + ; CHECK: [[Buf:%.*]] = call target("dx.Rawbuffer", <4 x i64>, 0, 0) + ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v4i64_0_0t( + ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + %buffer = call target("dx.Rawbuffer", <4 x i64>, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v4i64_0_0t( + i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + + ; check we perform two loads + ; and do 8 extracts and construct 4 i64s + ; CHECK-NOT: call {<4 x i64>, i1} @llvm.dx.resource.load.rawbuffer + ; CHECK: [[L0:%.*]] = call { <4 x i32>, i1 } + ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v4i64_0_0t( + ; CHECK-SAME: target("dx.Rawbuffer", <4 x i64>, 0, 0) [[Buf]], i32 %index, i32 0) + + ; CHECK: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0 + ; CHECK: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0 + ; CHECK: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1 + ; CHECK: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2 + ; CHECK: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3 + ; CHECK: [[ZLo1:%.*]] = zext i32 [[Lo1]] to i64 + ; CHECK: [[ZHi1:%.*]] = zext i32 [[Hi1]] to i64 + ; CHECK: [[A:%.*]] = shl i64 [[ZHi1]], 32 + ; CHECK: [[B:%.*]] = or i64 [[ZLo1]], [[A]] + ; CHECK: [[Vec1:%.*]] = insertelement <4 x i64> poison, i64 [[B]], i32 0 + ; CHECK: [[ZLo2:%.*]] = zext i32 [[Lo2]] to i64 + ; CHECK: [[ZHi2:%.*]] = zext i32 [[Hi2]] to i64 + ; CHECK: [[A:%.*]] = shl i64 [[ZHi2]], 32 + ; CHECK: [[B:%.*]] = or i64 [[ZLo2]], [[A]] + ; CHECK: [[Vec2:%.*]] = insertelement <4 x i64> [[Vec1]], i64 [[B]], i32 1 + + ; 2nd load + ; CHECK: [[L2:%.*]] = call { <4 x i32>, i1 } + ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v4i64_0_0t( + ; CHECK-SAME: target("dx.Rawbuffer", <4 x i64>, 0, 0) [[Buf]], i32 %index, i32 16) + + ; CHECK: [[D2:%.*]] = extractvalue { <4 x i32>, i1 } [[L2]], 0 + ; CHECK: [[Lo3:%.*]] = extractelement <4 x i32> [[D2]], i32 0 + ; CHECK: [[Hi3:%.*]] = extractelement <4 x i32> [[D2]], i32 1 + ; CHECK: [[Lo4:%.*]] = extractelement <4 x i32> [[D2]], i32 2 + ; CHECK: [[Hi4:%.*]] = extractelement <4 x i32> [[D2]], i32 3 + ; CHECK: [[ZLo3:%.*]] = zext i32 [[Lo3]] to i64 + ; CHECK: [[ZHi3:%.*]] = zext i32 [[Hi3]] to i64 + ; CHECK: [[A:%.*]] = shl i64 [[ZHi3]], 32 + ; CHECK: [[B:%.*]] = or i64 [[ZLo3]], [[A]] + ; CHECK: [[Vec3:%.*]] = insertelement <4 x i64> [[Vec2]], i64 [[B]], i32 2 + ; CHECK: [[ZLo4:%.*]] = zext i32 [[Lo4]] to i64 + ; CHECK: [[ZHi4:%.*]] = zext i32 [[Hi4]] to i64 + ; CHECK: [[A:%.*]] = shl i64 [[ZHi4]], 32 + ; CHECK: [[B:%.*]] = or i64 [[ZLo4]], [[A]] + ; CHECK: [[Vec4:%.*]] = insertelement <4 x i64> [[Vec3]], i64 [[B]], i32 3 + %load0 = call {<4 x i64>, i1} @llvm.dx.resource.load.rawbuffer( + target("dx.Rawbuffer", <4 x i64>, 0, 0) %buffer, i32 %index, i32 0) + + + ; CHECK-NOT: extractvalue {<4 x i64>, i1 } + %data0 = extractvalue {<4 x i64>, i1} %load0, 0 + ; check we extract checkbit from both loads and and them together + ; CHECK: [[B1:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 1 + ; CHECK: [[B2:%.*]] = extractvalue { <4 x i32>, i1 } [[L2]], 1 + ; CHECK: and i1 [[B1]], [[B2]] + %cb = extractvalue {<4 x i64>, i1} %load0, 1 + ret void +} diff --git a/llvm/test/CodeGen/DirectX/RawBufferStore-error64.ll b/llvm/test/CodeGen/DirectX/RawBufferStore-error64.ll deleted file mode 100644 index 895c4c3b9f5fe..0000000000000 --- a/llvm/test/CodeGen/DirectX/RawBufferStore-error64.ll +++ /dev/null @@ -1,20 +0,0 @@ -; We use llc for this test so that we don't abort after the first error. -; RUN: not llc %s -o /dev/null 2>&1 | FileCheck %s - -target triple = "dxil-pc-shadermodel6.2-compute" - -; Can't store 64 bit types directly until SM6.3 (byteaddressbuf.Store) -; CHECK: error: -; CHECK-SAME: in function storev4f64_byte -; CHECK-SAME: Cannot create RawBufferStore operation: Invalid overload type -define void @storev4f64_byte(i32 %offset, <4 x double> %data) "hlsl.export" { - %buffer = call target("dx.RawBuffer", i8, 1, 0, 0) - @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0_0( - i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) - - call void @llvm.dx.resource.store.rawbuffer.v4i64( - target("dx.RawBuffer", i8, 1, 0, 0) %buffer, - i32 %offset, i32 0, <4 x double> %data) - - ret void -} diff --git a/llvm/test/CodeGen/DirectX/RawBufferStoreDouble.ll b/llvm/test/CodeGen/DirectX/RawBufferStoreDouble.ll new file mode 100644 index 0000000000000..1bf8b6b52a049 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/RawBufferStoreDouble.ll @@ -0,0 +1,94 @@ +; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s + +target triple = "dxil-pc-shadermodel6.2-compute" + +define void @storef64(double %0, i32 %index) { + ; CHECK: [[B:%.*]] = tail call target("dx.RawBuffer", double, 1, 0) + ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_f64_1_0t( + ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + %buffer = tail call target("dx.RawBuffer", double, 1, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_f64_1_0t( + i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + + ; check we split the double and store the lo and hi bits + ; CHECK: [[SD:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double %0) + ; CHECK: [[Lo:%.*]] = extractvalue { i32, i32 } [[SD]], 0 + ; CHECK: [[Hi:%.*]] = extractvalue { i32, i32 } [[SD]], 1 + ; CHECK: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[Lo]], i32 0 + ; CHECK: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[Hi]], i32 1 + ; this shufflevector is unnecessary but generated to avoid specalization + ; CHECK: [[Vec3:%.*]] = shufflevector <2 x i32> [[Vec2]], <2 x i32> poison, <2 x i32> + ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f64_1_0t.v2i32( + ; CHECK-SAME: target("dx.RawBuffer", double, 1, 0) [[B]], i32 %index, i32 0, <2 x i32> [[Vec3]]) + call void @llvm.dx.resource.store.rawbuffer( + target("dx.RawBuffer", double, 1, 0) %buffer, i32 %index, i32 0, + double %0) + ret void +} + +define void @storev2f64(<2 x double> %0, i32 %index) { + ; CHECK: [[B:%.*]] = tail call target("dx.RawBuffer", <2 x double>, 1, 0) + ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v2f64_1_0t( + ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + %buffer = tail call target("dx.RawBuffer", <2 x double>, 1, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v2f64_1_0t( + i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + + ; CHECK: [[SD:%.*]] = call { <2 x i32>, <2 x i32> } + ; CHECK-SAME: @llvm.dx.splitdouble.v2i32(<2 x double> %0) + ; CHECK: [[Lo:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 0 + ; CHECK: [[Hi:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 1 + ; CHECK: [[Vec:%.*]] = shufflevector <2 x i32> [[Lo]], <2 x i32> [[Hi]], <4 x i32> + ; this shufflevector is unnecessary but generated to avoid specalization + ; CHECK: [[Vec2:%.*]] = shufflevector <4 x i32> [[Vec]], <4 x i32> poison, <4 x i32> + ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v2f64_1_0t.v4i32( + ; CHECK-SAME: target("dx.RawBuffer", <2 x double>, 1, 0) [[B]], i32 %index, i32 0, <4 x i32> [[Vec2]]) + call void @llvm.dx.resource.store.rawbuffer( + target("dx.RawBuffer", <2 x double>, 1, 0) %buffer, i32 %index, i32 0, + <2 x double> %0) + ret void +} + +define void @storev3f64(<3 x double> %0, i32 %index) { + ; CHECK: [[Buf:%.*]] = tail call target("dx.RawBuffer", <3 x double>, 1, 0) + ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v3f64_1_0t( + ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + %buffer = tail call target("dx.RawBuffer", <3 x double>, 1, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v3f64_1_0t( + i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + + ; CHECK: [[A:%.*]] = call { <3 x i32>, <3 x i32> } @llvm.dx.splitdouble.v3i32(<3 x double> %0) + ; CHECK: [[B:%.*]] = extractvalue { <3 x i32>, <3 x i32> } [[A]], 0 + ; CHECK: [[C:%.*]] = extractvalue { <3 x i32>, <3 x i32> } [[A]], 1 + ; CHECK: [[D:%.*]] = shufflevector <3 x i32> [[B]], <3 x i32> [[C]], <6 x i32> + ; CHECK: [[E:%.*]] = shufflevector <6 x i32> [[D]], <6 x i32> poison, <4 x i32> + ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v3f64_1_0t.v4i32(target("dx.RawBuffer", <3 x double>, 1, 0) [[Buf]], i32 %index, i32 0, <4 x i32> [[E]]) + ; CHECK: [[F:%.*]] = shufflevector <6 x i32> [[D]], <6 x i32> poison, <2 x i32> + ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v3f64_1_0t.v2i32(target("dx.RawBuffer", <3 x double>, 1, 0) [[Buf]], i32 %index, i32 16, <2 x i32> [[F]]) + call void @llvm.dx.resource.store.rawbuffer( + target("dx.RawBuffer", <3 x double>, 1, 0) %buffer, i32 %index, i32 0, + <3 x double> %0) + ret void +} + +define void @storev4f64(<4 x double> %0, i32 %index) { + ; CHECK: [[Buf:%.*]] = tail call target("dx.RawBuffer", <4 x double>, 1, 0) + ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v4f64_1_0t( + ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + %buffer = tail call target("dx.RawBuffer", <4 x double>, 1, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v4f64_1_0t( + i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + + ; CHECK: [[A:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.dx.splitdouble.v4i32(<4 x double> %0) + ; CHECK: [[B:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[A]], 0 + ; CHECK: [[C:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[A]], 1 + ; CHECK: [[D:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[C]], <8 x i32> + ; CHECK: [[E:%.*]] = shufflevector <8 x i32> [[D]], <8 x i32> poison, <4 x i32> + ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v4f64_1_0t.v4i32(target("dx.RawBuffer", <4 x double>, 1, 0) [[Buf]], i32 %index, i32 0, <4 x i32> [[E]]) + ; CHECK: [[F:%.*]] = shufflevector <8 x i32> [[D]], <8 x i32> poison, <4 x i32> + ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v4f64_1_0t.v4i32(target("dx.RawBuffer", <4 x double>, 1, 0) [[Buf]], i32 %index, i32 16, <4 x i32> [[F]]) + call void @llvm.dx.resource.store.rawbuffer( + target("dx.RawBuffer", <4 x double>, 1, 0) %buffer, i32 %index, i32 0, + <4 x double> %0) + ret void +} diff --git a/llvm/test/CodeGen/DirectX/RawBufferStoreInt64.ll b/llvm/test/CodeGen/DirectX/RawBufferStoreInt64.ll new file mode 100644 index 0000000000000..76fc5f011c5ca --- /dev/null +++ b/llvm/test/CodeGen/DirectX/RawBufferStoreInt64.ll @@ -0,0 +1,93 @@ +; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s + +target triple = "dxil-pc-shadermodel6.2-compute" + +define void @storei64(i64 %0, i32 %index) { + ; CHECK: [[Buf:%.*]] = tail call target("dx.RawBuffer", i64, 1, 0) + ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i64_1_0t( + ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + %buffer = tail call target("dx.RawBuffer", i64, 1, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i64_1_0t( + i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + + ; check we split the i64 and store the lo and hi bits + ; CHECK: [[A:%.*]] = trunc i64 %0 to i32 + ; CHECK: [[B:%.*]] = lshr i64 %0, 32 + ; CHECK: [[C:%.*]] = trunc i64 [[B]] to i32 + ; CHECK: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i32 0 + ; CHECK: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[C]], i32 1 + ; this shufflevector is unnecessary but generated to avoid specalization + ; CHECK: [[Vec3:%.*]] = shufflevector <2 x i32> [[Vec2]], <2 x i32> poison, <2 x i32> + ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i64_1_0t.v2i32( + ; CHECK-SAME: target("dx.RawBuffer", i64, 1, 0) [[Buf]], i32 %index, i32 0, <2 x i32> [[Vec3]]) + call void @llvm.dx.resource.store.rawbuffer( + target("dx.RawBuffer", i64, 1, 0) %buffer, i32 %index, i32 0, + i64 %0) + ret void +} + +define void @storev2i64(<2 x i64> %0, i32 %index) { + ; CHECK: [[Buf:%.*]] = tail call target("dx.RawBuffer", <2 x i64>, 1, 0) + ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v2i64_1_0t( + ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + %buffer = tail call target("dx.RawBuffer", <2 x i64>, 1, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v2i64_1_0t( + i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + + ; CHECK: [[A:%.*]] = trunc <2 x i64> %0 to <2 x i32> + ; CHECK: [[B:%.*]] = lshr <2 x i64> %0, splat (i64 32) + ; CHECK: [[C:%.*]] = trunc <2 x i64> [[B]] to <2 x i32> + ; CHECK: [[Vec:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[C]], <4 x i32> + ; this shufflevector is unnecessary but generated to avoid specalization + ; CHECK: [[Vec2:%.*]] = shufflevector <4 x i32> [[Vec]], <4 x i32> poison, <4 x i32> + ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v2i64_1_0t.v4i32( + ; CHECK-SAME: target("dx.RawBuffer", <2 x i64>, 1, 0) [[Buf]], i32 %index, i32 0, <4 x i32> [[Vec2]]) + call void @llvm.dx.resource.store.rawbuffer( + target("dx.RawBuffer", <2 x i64>, 1, 0) %buffer, i32 %index, i32 0, + <2 x i64> %0) + ret void +} + +define void @storev3i64(<3 x i64> %0, i32 %index) { + ; CHECK: [[Buf:%.*]] = tail call target("dx.RawBuffer", <3 x i64>, 1, 0) + ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v3i64_1_0t( + ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + %buffer = tail call target("dx.RawBuffer", <3 x i64>, 1, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v3i64_1_0t( + i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + + ; CHECK: [[A:%.*]] = trunc <3 x i64> %0 to <3 x i32> + ; CHECK: [[B:%.*]] = lshr <3 x i64> %0, splat (i64 32) + ; CHECK: [[C:%.*]] = trunc <3 x i64> [[B]] to <3 x i32> + ; CHECK: [[D:%.*]] = shufflevector <3 x i32> [[A]], <3 x i32> [[C]], <6 x i32> + ; CHECK: [[E:%.*]] = shufflevector <6 x i32> [[D]], <6 x i32> poison, <4 x i32> + ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v3i64_1_0t.v4i32(target("dx.RawBuffer", <3 x i64>, 1, 0) [[Buf]], i32 %index, i32 0, <4 x i32> [[E]]) + ; CHECK: [[F:%.*]] = shufflevector <6 x i32> [[D]], <6 x i32> poison, <2 x i32> + ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v3i64_1_0t.v2i32(target("dx.RawBuffer", <3 x i64>, 1, 0) [[Buf]], i32 %index, i32 16, <2 x i32> [[F]]) + call void @llvm.dx.resource.store.rawbuffer( + target("dx.RawBuffer", <3 x i64>, 1, 0) %buffer, i32 %index, i32 0, + <3 x i64> %0) + ret void +} + +define void @storev4i64(<4 x i64> %0, i32 %index) { + ; CHECK: [[Buf:%.*]] = tail call target("dx.RawBuffer", <4 x i64>, 1, 0) + ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v4i64_1_0t( + ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + %buffer = tail call target("dx.RawBuffer", <4 x i64>, 1, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v4i64_1_0t( + i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + + ; CHECK: [[A:%.*]] = trunc <4 x i64> %0 to <4 x i32> + ; CHECK: [[B:%.*]] = lshr <4 x i64> %0, splat (i64 32) + ; CHECK: [[C:%.*]] = trunc <4 x i64> [[B]] to <4 x i32> + ; CHECK: [[D:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[C]], <8 x i32> + ; CHECK: [[E:%.*]] = shufflevector <8 x i32> [[D]], <8 x i32> poison, <4 x i32> + ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v4i64_1_0t.v4i32(target("dx.RawBuffer", <4 x i64>, 1, 0) [[Buf]], i32 %index, i32 0, <4 x i32> [[E]]) + ; CHECK: [[F:%.*]] = shufflevector <8 x i32> [[D]], <8 x i32> poison, <4 x i32> + ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v4i64_1_0t.v4i32(target("dx.RawBuffer", <4 x i64>, 1, 0) [[Buf]], i32 %index, i32 16, <4 x i32> [[F]]) + call void @llvm.dx.resource.store.rawbuffer( + target("dx.RawBuffer", <4 x i64>, 1, 0) %buffer, i32 %index, i32 0, + <4 x i64> %0) + ret void +} From 5e2f7a5be6686bb6f8c72be955b8b1ab71ae7e09 Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Thu, 3 Jul 2025 08:55:47 -0700 Subject: [PATCH 2/3] respond to pr comments --- .../Target/DirectX/DXILIntrinsicExpansion.cpp | 78 +++++++++++-------- .../test/CodeGen/DirectX/BufferStoreDouble.ll | 8 +- llvm/test/CodeGen/DirectX/BufferStoreInt64.ll | 8 +- .../CodeGen/DirectX/RawBufferStoreDouble.ll | 8 +- .../CodeGen/DirectX/RawBufferStoreInt64.ll | 8 +- 5 files changed, 52 insertions(+), 58 deletions(-) diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index 0770f03572d5a..ee1db54446cb8 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -42,6 +42,15 @@ class DXILIntrinsicExpansionLegacy : public ModulePass { static char ID; // Pass identification. }; +static bool resourceAccessNeeds64BitExpansion(Module *M, Type *OverloadTy, + bool IsRaw) { + if (IsRaw && M->getTargetTriple().getDXILVersion() > VersionTuple(1, 2)) + return false; + + Type *ScalarTy = OverloadTy->getScalarType(); + return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64); +} + static bool isIntrinsicExpansion(Function &F) { switch (F.getIntrinsicID()) { case Intrinsic::abs: @@ -72,27 +81,19 @@ static bool isIntrinsicExpansion(Function &F) { case Intrinsic::vector_reduce_fadd: return true; case Intrinsic::dx_resource_load_rawbuffer: - if (F.getParent()->getTargetTriple().getDXILVersion() > VersionTuple(1, 2)) - return false; - // fallthrough to check if double or i64 - LLVM_FALLTHROUGH; - case Intrinsic::dx_resource_load_typedbuffer: { - // We need to handle i64, doubles, and vectors of them. - Type *ScalarTy = - F.getReturnType()->getStructElementType(0)->getScalarType(); - return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64); - } - case Intrinsic::dx_resource_store_rawbuffer: { - if (F.getParent()->getTargetTriple().getDXILVersion() > VersionTuple(1, 2)) - return false; - Type *ScalarTy = F.getFunctionType()->getParamType(3)->getScalarType(); - return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64); - } - case Intrinsic::dx_resource_store_typedbuffer: { - // We need to handle i64 and doubles and vectors of i64 and doubles. - Type *ScalarTy = F.getFunctionType()->getParamType(2)->getScalarType(); - return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64); - } + return resourceAccessNeeds64BitExpansion( + F.getParent(), F.getReturnType()->getStructElementType(0), + /*IsRaw*/ true); + case Intrinsic::dx_resource_load_typedbuffer: + return resourceAccessNeeds64BitExpansion( + F.getParent(), F.getReturnType()->getStructElementType(0), + /*IsRaw*/ false); + case Intrinsic::dx_resource_store_rawbuffer: + return resourceAccessNeeds64BitExpansion( + F.getParent(), F.getFunctionType()->getParamType(3), /*IsRaw*/ true); + case Intrinsic::dx_resource_store_typedbuffer: + return resourceAccessNeeds64BitExpansion( + F.getParent(), F.getFunctionType()->getParamType(2), /*IsRaw*/ false); } return false; } @@ -563,19 +564,20 @@ static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw) { bool IsDouble = ScalarTy->isDoubleTy(); assert(IsDouble || ScalarTy->isIntegerTy(64) && "Only expand double or int64 scalars or vectors"); - bool IsVector = isa(BufferTy); - + bool IsVector = false; unsigned ExtractNum = 2; if (auto *VT = dyn_cast(BufferTy)) { - if (!IsRaw) - assert(VT->getNumElements() == 2 && - "TypedBufferLoad vector must be size 2"); ExtractNum = 2 * VT->getNumElements(); + IsVector = true; + assert(IsRaw || ExtractNum == 4 && "TypedBufferLoad vector must be size 2"); } SmallVector Loads; Value *Result = PoisonValue::get(BufferTy); unsigned Base = 0; + // If we need to extract more than 4 i32; we need to break it up into + // more than one load. LoadNum tells us how many i32s we are loading in + // each load while (ExtractNum > 0) { unsigned LoadNum = std::min(ExtractNum, 4u); Type *Ty = VectorType::get(Builder.getInt32Ty(), LoadNum, false); @@ -649,6 +651,8 @@ static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw) { } else { // Use of the check bit assert(Indices[0] == 1 && "Unexpected type for typedbufferload"); + // Note: This does not always match the historical behaviour of DXC. + // See https://github.com/microsoft/DirectXShaderCompiler/issues/7622 if (!CheckBit) { SmallVector CheckBits; for (Value *L : Loads) @@ -666,22 +670,22 @@ static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw) { static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw) { IRBuilder<> Builder(Orig); - Type *BufferTy = Orig->getFunctionType()->getParamType(IsRaw ? 3 : 2); + unsigned ValIndex = IsRaw ? 3 : 2; + Type *BufferTy = Orig->getFunctionType()->getParamType(ValIndex); Type *ScalarTy = BufferTy->getScalarType(); bool IsDouble = ScalarTy->isDoubleTy(); assert((IsDouble || ScalarTy->isIntegerTy(64)) && "Only expand double or int64 scalars or vectors"); // Determine if we're dealing with a vector or scalar - bool IsVector = isa(BufferTy); + bool IsVector = false; unsigned ExtractNum = 2; unsigned VecLen = 0; if (auto *VT = dyn_cast(BufferTy)) { - if (!IsRaw) - assert(VT->getNumElements() == 2 && - "TypedBufferStore vector must be size 2"); VecLen = VT->getNumElements(); + assert(IsRaw || VecLen == 2 && "TypedBufferStore vector must be size 2"); ExtractNum = VecLen * 2; + IsVector = true; } // Create the appropriate vector type for the result @@ -699,12 +703,12 @@ static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw) { if (IsDouble) { auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy); Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble, - {Orig->getOperand(IsRaw ? 3 : 2)}); + {Orig->getOperand(ValIndex)}); LowBits = Builder.CreateExtractValue(Split, 0); HighBits = Builder.CreateExtractValue(Split, 1); } else { // Handle int64 type(s) - Value *InputVal = Orig->getOperand(IsRaw ? 3 : 2); + Value *InputVal = Orig->getOperand(ValIndex); Constant *ShiftAmt = Builder.getInt64(32); if (IsVector) ShiftAmt = @@ -728,6 +732,9 @@ static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw) { Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1)); } + // If we need to extract more than 4 i32; we need to break it up into + // more than one store. StoreNum tells us how many i32s we are storing in + // each store unsigned Base = 0; while (ExtractNum > 0) { unsigned StoreNum = std::min(ExtractNum, 4u); @@ -744,7 +751,10 @@ static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw) { for (unsigned I = 0; I < StoreNum; ++I) { Mask.push_back(Base + I); } - Value *SubVal = Builder.CreateShuffleVector(Val, Mask); + + Value *SubVal = Val; + if (VecLen > 2) + SubVal = Builder.CreateShuffleVector(Val, Mask); Args.push_back(SubVal); // Create the final intrinsic call diff --git a/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll b/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll index 560bb56d34d45..9c3dab0cc1e46 100644 --- a/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll +++ b/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll @@ -16,10 +16,8 @@ define void @storef64(double %0) { ; CHECK: [[Hi:%.*]] = extractvalue { i32, i32 } [[SD]], 1 ; CHECK: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[Lo]], i32 0 ; CHECK: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[Hi]], i32 1 - ; this shufflevector is unnecessary but generated to avoid specalization - ; CHECK: [[Vec3:%.*]] = shufflevector <2 x i32> [[Vec2]], <2 x i32> poison, <2 x i32> ; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_f64_1_0_0t.v2i32( - ; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0, <2 x i32> [[Vec3]]) + ; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0, <2 x i32> [[Vec2]]) call void @llvm.dx.resource.store.typedbuffer( target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0, double %0) @@ -40,10 +38,8 @@ define void @storev2f64(<2 x double> %0) { ; CHECK: [[Lo:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 0 ; CHECK: [[Hi:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 1 ; CHECK: [[Vec:%.*]] = shufflevector <2 x i32> [[Lo]], <2 x i32> [[Hi]], <4 x i32> - ; this shufflevector is unnecessary but generated to avoid specalization - ; CHECK: [[Vec2:%.*]] = shufflevector <4 x i32> [[Vec]], <4 x i32> poison, <4 x i32> ; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2f64_1_0_0t.v4i32( - ; CHECK-SAME: target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[B]], i32 0, <4 x i32> [[Vec2]]) + ; CHECK-SAME: target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[B]], i32 0, <4 x i32> [[Vec]]) call void @llvm.dx.resource.store.typedbuffer( target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0, <2 x double> %0) diff --git a/llvm/test/CodeGen/DirectX/BufferStoreInt64.ll b/llvm/test/CodeGen/DirectX/BufferStoreInt64.ll index 31031804a0e8b..c97a02d1873a0 100644 --- a/llvm/test/CodeGen/DirectX/BufferStoreInt64.ll +++ b/llvm/test/CodeGen/DirectX/BufferStoreInt64.ll @@ -12,9 +12,7 @@ define void @storei64(i64 %0) { ; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP4]], i32 1 -; the shufflevector is unnecessary but generated to avoid too much specalization -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_i64_1_0_0t.v2i32(target("dx.TypedBuffer", i64, 1, 0, 0) [[BUFFER]], i32 0, <2 x i32> [[TMP7]]) +; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_i64_1_0_0t.v2i32(target("dx.TypedBuffer", i64, 1, 0, 0) [[BUFFER]], i32 0, <2 x i32> [[TMP6]]) ; CHECK-NEXT: ret void ; %buffer = tail call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) @@ -31,9 +29,7 @@ define void @storev2i64(<2 x i64> %0) { ; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP0]], splat (i64 32) ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP3]] to <2 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> -; the shufflevector is unnecessary but generated to avoid too much specalization -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2i64_1_0_0t.v4i32(target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) [[BUFFER]], i32 0, <4 x i32> [[TMP14]]) +; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2i64_1_0_0t.v4i32(target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) [[BUFFER]], i32 0, <4 x i32> [[TMP13]]) ; CHECK-NEXT: ret void ; %buffer = tail call target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) diff --git a/llvm/test/CodeGen/DirectX/RawBufferStoreDouble.ll b/llvm/test/CodeGen/DirectX/RawBufferStoreDouble.ll index 1bf8b6b52a049..7af0117842c03 100644 --- a/llvm/test/CodeGen/DirectX/RawBufferStoreDouble.ll +++ b/llvm/test/CodeGen/DirectX/RawBufferStoreDouble.ll @@ -16,10 +16,8 @@ define void @storef64(double %0, i32 %index) { ; CHECK: [[Hi:%.*]] = extractvalue { i32, i32 } [[SD]], 1 ; CHECK: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[Lo]], i32 0 ; CHECK: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[Hi]], i32 1 - ; this shufflevector is unnecessary but generated to avoid specalization - ; CHECK: [[Vec3:%.*]] = shufflevector <2 x i32> [[Vec2]], <2 x i32> poison, <2 x i32> ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f64_1_0t.v2i32( - ; CHECK-SAME: target("dx.RawBuffer", double, 1, 0) [[B]], i32 %index, i32 0, <2 x i32> [[Vec3]]) + ; CHECK-SAME: target("dx.RawBuffer", double, 1, 0) [[B]], i32 %index, i32 0, <2 x i32> [[Vec2]]) call void @llvm.dx.resource.store.rawbuffer( target("dx.RawBuffer", double, 1, 0) %buffer, i32 %index, i32 0, double %0) @@ -39,10 +37,8 @@ define void @storev2f64(<2 x double> %0, i32 %index) { ; CHECK: [[Lo:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 0 ; CHECK: [[Hi:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 1 ; CHECK: [[Vec:%.*]] = shufflevector <2 x i32> [[Lo]], <2 x i32> [[Hi]], <4 x i32> - ; this shufflevector is unnecessary but generated to avoid specalization - ; CHECK: [[Vec2:%.*]] = shufflevector <4 x i32> [[Vec]], <4 x i32> poison, <4 x i32> ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v2f64_1_0t.v4i32( - ; CHECK-SAME: target("dx.RawBuffer", <2 x double>, 1, 0) [[B]], i32 %index, i32 0, <4 x i32> [[Vec2]]) + ; CHECK-SAME: target("dx.RawBuffer", <2 x double>, 1, 0) [[B]], i32 %index, i32 0, <4 x i32> [[Vec]]) call void @llvm.dx.resource.store.rawbuffer( target("dx.RawBuffer", <2 x double>, 1, 0) %buffer, i32 %index, i32 0, <2 x double> %0) diff --git a/llvm/test/CodeGen/DirectX/RawBufferStoreInt64.ll b/llvm/test/CodeGen/DirectX/RawBufferStoreInt64.ll index 76fc5f011c5ca..0cff8aaf8ac33 100644 --- a/llvm/test/CodeGen/DirectX/RawBufferStoreInt64.ll +++ b/llvm/test/CodeGen/DirectX/RawBufferStoreInt64.ll @@ -16,10 +16,8 @@ define void @storei64(i64 %0, i32 %index) { ; CHECK: [[C:%.*]] = trunc i64 [[B]] to i32 ; CHECK: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i32 0 ; CHECK: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[C]], i32 1 - ; this shufflevector is unnecessary but generated to avoid specalization - ; CHECK: [[Vec3:%.*]] = shufflevector <2 x i32> [[Vec2]], <2 x i32> poison, <2 x i32> ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i64_1_0t.v2i32( - ; CHECK-SAME: target("dx.RawBuffer", i64, 1, 0) [[Buf]], i32 %index, i32 0, <2 x i32> [[Vec3]]) + ; CHECK-SAME: target("dx.RawBuffer", i64, 1, 0) [[Buf]], i32 %index, i32 0, <2 x i32> [[Vec2]]) call void @llvm.dx.resource.store.rawbuffer( target("dx.RawBuffer", i64, 1, 0) %buffer, i32 %index, i32 0, i64 %0) @@ -38,10 +36,8 @@ define void @storev2i64(<2 x i64> %0, i32 %index) { ; CHECK: [[B:%.*]] = lshr <2 x i64> %0, splat (i64 32) ; CHECK: [[C:%.*]] = trunc <2 x i64> [[B]] to <2 x i32> ; CHECK: [[Vec:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[C]], <4 x i32> - ; this shufflevector is unnecessary but generated to avoid specalization - ; CHECK: [[Vec2:%.*]] = shufflevector <4 x i32> [[Vec]], <4 x i32> poison, <4 x i32> ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v2i64_1_0t.v4i32( - ; CHECK-SAME: target("dx.RawBuffer", <2 x i64>, 1, 0) [[Buf]], i32 %index, i32 0, <4 x i32> [[Vec2]]) + ; CHECK-SAME: target("dx.RawBuffer", <2 x i64>, 1, 0) [[Buf]], i32 %index, i32 0, <4 x i32> [[Vec]]) call void @llvm.dx.resource.store.rawbuffer( target("dx.RawBuffer", <2 x i64>, 1, 0) %buffer, i32 %index, i32 0, <2 x i64> %0) From a71e7d72f2c3a4be32919e61e73e9d29482c2d96 Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Mon, 7 Jul 2025 14:53:48 -0700 Subject: [PATCH 3/3] update tests based on feedback --- .../CodeGen/DirectX/RawBufferLoadDouble.ll | 216 +++++++------ .../CodeGen/DirectX/RawBufferLoadInt64.ll | 288 ++++++++++-------- .../CodeGen/DirectX/RawBufferStoreDouble.ll | 81 +++-- .../CodeGen/DirectX/RawBufferStoreInt64.ll | 78 +++-- 4 files changed, 376 insertions(+), 287 deletions(-) diff --git a/llvm/test/CodeGen/DirectX/RawBufferLoadDouble.ll b/llvm/test/CodeGen/DirectX/RawBufferLoadDouble.ll index dc0c19dad9e06..9213d60c9b496 100644 --- a/llvm/test/CodeGen/DirectX/RawBufferLoadDouble.ll +++ b/llvm/test/CodeGen/DirectX/RawBufferLoadDouble.ll @@ -1,6 +1,5 @@ -; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s - -target triple = "dxil-pc-shadermodel6.2-compute" +; RUN: opt -mtriple=dxil-pc-shadermodel6.2-compute -S -dxil-intrinsic-expansion %s | FileCheck %s --check-prefixes=CHECK,CHECK62 +; RUN: opt -mtriple=dxil-pc-shadermodel6.3-compute -S -dxil-intrinsic-expansion %s | FileCheck %s --check-prefixes=CHECK,CHECK63 define void @loadf64(i32 %index) { ; check the handle from binding is unchanged @@ -11,20 +10,26 @@ define void @loadf64(i32 %index) { @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_f64_1_0_0t( i32 0, i32 1, i32 1, i32 0, i1 false, ptr null) + ; check we don't modify the code in sm6.3 or later + ; CHECK63: [[L0:%.*]] = call { double, i1 } @llvm.dx.resource.load.rawbuffer + ; CHECK63-SAME: target("dx.Rawbuffer", double, 0, 0) [[B]], i32 %index, i32 0) + ; check we load an <2 x i32> instead of a double - ; CHECK-NOT: call {double, i1} @llvm.dx.resource.load.rawbuffer - ; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 } - ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v2i32.tdx.Rawbuffer_f64_0_0t( - ; CHECK-SAME: target("dx.Rawbuffer", double, 0, 0) [[B]], i32 %index, i32 0) + ; CHECK62-NOT: call {double, i1} @llvm.dx.resource.load.rawbuffer + ; CHECK62: [[L0:%.*]] = call { <2 x i32>, i1 } + ; CHECK62-SAME: @llvm.dx.resource.load.rawbuffer.v2i32.tdx.Rawbuffer_f64_0_0t( + ; CHECK62-SAME: target("dx.Rawbuffer", double, 0, 0) [[B]], i32 %index, i32 0) %load0 = call {double, i1} @llvm.dx.resource.load.rawbuffer( target("dx.Rawbuffer", double, 0, 0) %buffer, i32 %index, i32 0) + ; CHECK63: extractvalue { double, i1 } [[L0]], 0 + ; check we extract the two i32 and construct a double - ; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0 - ; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i32 0 - ; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i32 1 - ; CHECK: [[DBL:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]]) - ; CHECK-NOT: extractvalue { double, i1 } + ; CHECK62: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0 + ; CHECK62: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i32 0 + ; CHECK62: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i32 1 + ; CHECK62: [[DBL:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]]) + ; CHECK62-NOT: extractvalue { double, i1 } %data0 = extractvalue {double, i1} %load0, 0 ret void } @@ -38,24 +43,30 @@ define void @loadv2f64(i32 %index) { @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v2f64_1_0_0t( i32 0, i32 1, i32 1, i32 0, i1 false, ptr null) + ; check we don't modify the code in sm6.3 or later + ; CHECK63: [[L0:%.*]] = call { <2 x double>, i1 } @llvm.dx.resource.load.rawbuffer + ; CHECK63-SAME: target("dx.Rawbuffer", <2 x double>, 0, 0) [[B]], i32 %index, i32 0) + ; check we load an <4 x i32> instead of a double2 - ; CHECK: [[L0:%.*]] = call { <4 x i32>, i1 } - ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v2f64_0_0t( - ; CHECK-SAME: target("dx.Rawbuffer", <2 x double>, 0, 0) [[B]], i32 %index, i32 0) + ; CHECK62: [[L0:%.*]] = call { <4 x i32>, i1 } + ; CHECK62-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v2f64_0_0t( + ; CHECK62-SAME: target("dx.Rawbuffer", <2 x double>, 0, 0) [[B]], i32 %index, i32 0) %load0 = call { <2 x double>, i1 } @llvm.dx.resource.load.rawbuffer( target("dx.Rawbuffer", <2 x double>, 0, 0) %buffer, i32 %index, i32 0) + ; CHECK63: extractvalue { <2 x double>, i1 } [[L0]], 0 + ; check we extract the 4 i32 and construct a <2 x double> - ; CHECK: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0 - ; CHECK: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0 - ; CHECK: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1 - ; CHECK: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2 - ; CHECK: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3 - ; CHECK: [[Dbl1:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo1]], i32 [[Hi1]]) - ; CHECK: [[Vec:%.*]] = insertelement <2 x double> poison, double [[Dbl1]], i32 0 - ; CHECK: [[Dbl2:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo2]], i32 [[Hi2]]) - ; CHECK: [[Vec2:%.*]] = insertelement <2 x double> [[Vec]], double [[Dbl2]], i32 1 - ; CHECK-NOT: extractvalue { <2 x double>, i1 } + ; CHECK62: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0 + ; CHECK62: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0 + ; CHECK62: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1 + ; CHECK62: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2 + ; CHECK62: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3 + ; CHECK62: [[Dbl1:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo1]], i32 [[Hi1]]) + ; CHECK62: [[Vec:%.*]] = insertelement <2 x double> poison, double [[Dbl1]], i32 0 + ; CHECK62: [[Dbl2:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo2]], i32 [[Hi2]]) + ; CHECK62: [[Vec2:%.*]] = insertelement <2 x double> [[Vec]], double [[Dbl2]], i32 1 + ; CHECK62-NOT: extractvalue { <2 x double>, i1 } %data0 = extractvalue { <2 x double>, i1 } %load0, 0 ret void } @@ -70,22 +81,29 @@ define void @loadf64WithCheckBit(i32 %index) { @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_f64_1_0_0t( i32 0, i32 1, i32 1, i32 0, i1 false, ptr null) + ; check we don't modify the code in sm6.3 or later + ; CHECK63: [[L0:%.*]] = call { double, i1 } @llvm.dx.resource.load.rawbuffer + ; CHECK63-SAME: target("dx.Rawbuffer", double, 0, 0) [[B]], i32 %index, i32 0) + ; check we load an <2 x i32> instead of a double - ; CHECK-NOT: call {double, i1} @llvm.dx.resource.load.rawbuffer - ; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 } - ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v2i32.tdx.Rawbuffer_f64_0_0t( - ; CHECK-SAME: target("dx.Rawbuffer", double, 0, 0) [[B]], i32 %index, i32 0) + ; CHECK62-NOT: call {double, i1} @llvm.dx.resource.load.rawbuffer + ; CHECK62: [[L0:%.*]] = call { <2 x i32>, i1 } + ; CHECK62-SAME: @llvm.dx.resource.load.rawbuffer.v2i32.tdx.Rawbuffer_f64_0_0t( + ; CHECK62-SAME: target("dx.Rawbuffer", double, 0, 0) [[B]], i32 %index, i32 0) %load0 = call {double, i1} @llvm.dx.resource.load.rawbuffer( target("dx.Rawbuffer", double, 0, 0) %buffer, i32 %index, i32 0) + ; CHECK63: extractvalue { double, i1 } [[L0]], 0 + ; CHECK63: extractvalue { double, i1 } [[L0]], 1 + ; check we extract the two i32 and construct a double - ; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0 - ; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i32 0 - ; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i32 1 - ; CHECK: [[DBL:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]]) + ; CHECK62: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0 + ; CHECK62: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i32 0 + ; CHECK62: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i32 1 + ; CHECK62: [[DBL:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]]) %data0 = extractvalue {double, i1} %load0, 0 - ; CHECK: extractvalue { <2 x i32>, i1 } [[L0]], 1 - ; CHECK-NOT: extractvalue { double, i1 } + ; CHECK62: extractvalue { <2 x i32>, i1 } [[L0]], 1 + ; CHECK62-NOT: extractvalue { double, i1 } %cb = extractvalue {double, i1} %load0, 1 ret void } @@ -103,43 +121,49 @@ define void @loadv3f64(i32 %index) { @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v3f64_0_0t( i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + ; check we don't modify the code in sm6.3 or later + ; CHECK63: [[L0:%.*]] = call { <3 x double>, i1 } @llvm.dx.resource.load.rawbuffer + ; CHECK63-SAME: target("dx.Rawbuffer", <3 x double>, 0, 0) [[B]], i32 %index, i32 0) + ; check we perform two loads ; and do 6 extracts and construct 3 doubles - ; CHECK-NOT: call {<3 x double>, i1} @llvm.dx.resource.load.rawbuffer - ; CHECK: [[L0:%.*]] = call { <4 x i32>, i1 } - ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v3f64_0_0t( - ; CHECK-SAME: target("dx.Rawbuffer", <3 x double>, 0, 0) [[B]], i32 %index, i32 0) - - ; CHECK: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0 - ; CHECK: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0 - ; CHECK: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1 - ; CHECK: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2 - ; CHECK: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3 - ; CHECK: [[DBL1:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo1]], i32 [[Hi1]]) - ; CHECK: [[Vec1:%.*]] = insertelement <3 x double> poison, double [[DBL1]], i32 0 - ; CHECK: [[DBL2:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo2]], i32 [[Hi2]]) - ; CHECK: [[Vec2:%.*]] = insertelement <3 x double> [[Vec1]], double [[DBL2]], i32 1 + ; CHECK62-NOT: call {<3 x double>, i1} @llvm.dx.resource.load.rawbuffer + ; CHECK62: [[L0:%.*]] = call { <4 x i32>, i1 } + ; CHECK62-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v3f64_0_0t( + ; CHECK62-SAME: target("dx.Rawbuffer", <3 x double>, 0, 0) [[B]], i32 %index, i32 0) + + ; CHECK62: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0 + ; CHECK62: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0 + ; CHECK62: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1 + ; CHECK62: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2 + ; CHECK62: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3 + ; CHECK62: [[DBL1:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo1]], i32 [[Hi1]]) + ; CHECK62: [[Vec1:%.*]] = insertelement <3 x double> poison, double [[DBL1]], i32 0 + ; CHECK62: [[DBL2:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo2]], i32 [[Hi2]]) + ; CHECK62: [[Vec2:%.*]] = insertelement <3 x double> [[Vec1]], double [[DBL2]], i32 1 ; 2nd load - ; CHECK: [[L2:%.*]] = call { <2 x i32>, i1 } - ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v2i32.tdx.Rawbuffer_v3f64_0_0t( - ; CHECK-SAME: target("dx.Rawbuffer", <3 x double>, 0, 0) [[B]], i32 %index, i32 16) - - ; CHECK: [[D2:%.*]] = extractvalue { <2 x i32>, i1 } [[L2]], 0 - ; CHECK: [[Lo3:%.*]] = extractelement <2 x i32> [[D2]], i32 0 - ; CHECK: [[Hi3:%.*]] = extractelement <2 x i32> [[D2]], i32 1 - ; CHECK: [[DBL3:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo3]], i32 [[Hi3]]) - ; CHECK: [[Vec3:%.*]] = insertelement <3 x double> [[Vec2]], double [[DBL3]], i32 2 + ; CHECK62: [[L2:%.*]] = call { <2 x i32>, i1 } + ; CHECK62-SAME: @llvm.dx.resource.load.rawbuffer.v2i32.tdx.Rawbuffer_v3f64_0_0t( + ; CHECK62-SAME: target("dx.Rawbuffer", <3 x double>, 0, 0) [[B]], i32 %index, i32 16) + + ; CHECK62: [[D2:%.*]] = extractvalue { <2 x i32>, i1 } [[L2]], 0 + ; CHECK62: [[Lo3:%.*]] = extractelement <2 x i32> [[D2]], i32 0 + ; CHECK62: [[Hi3:%.*]] = extractelement <2 x i32> [[D2]], i32 1 + ; CHECK62: [[DBL3:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo3]], i32 [[Hi3]]) + ; CHECK62: [[Vec3:%.*]] = insertelement <3 x double> [[Vec2]], double [[DBL3]], i32 2 %load0 = call {<3 x double>, i1} @llvm.dx.resource.load.rawbuffer( target("dx.Rawbuffer", <3 x double>, 0, 0) %buffer, i32 %index, i32 0) + ; CHECK63: extractvalue { <3 x double>, i1 } [[L0]], 0 + ; CHECK63: extractvalue { <3 x double>, i1 } [[L0]], 1 - ; CHECK-NOT: extractvalue {<3 x double>, i1 } + ; CHECK62-NOT: extractvalue {<3 x double>, i1 } %data0 = extractvalue {<3 x double>, i1} %load0, 0 ; check we extract checkbit from both loads and and them together - ; CHECK: [[B1:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 1 - ; CHECK: [[B2:%.*]] = extractvalue { <2 x i32>, i1 } [[L2]], 1 - ; CHECK: and i1 [[B1]], [[B2]] + ; CHECK62: [[B1:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 1 + ; CHECK62: [[B2:%.*]] = extractvalue { <2 x i32>, i1 } [[L2]], 1 + ; CHECK62: and i1 [[B1]], [[B2]] %cb = extractvalue {<3 x double>, i1} %load0, 1 ret void } @@ -153,47 +177,53 @@ define void @loadv4f64(i32 %index) { @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v4f64_0_0t( i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + ; check we don't modify the code in sm6.3 or later + ; CHECK63: [[L0:%.*]] = call { <4 x double>, i1 } @llvm.dx.resource.load.rawbuffer + ; CHECK63-SAME: target("dx.Rawbuffer", <4 x double>, 0, 0) [[B]], i32 %index, i32 0) + ; check we perform two loads ; and do 8 extracts and construct 4 doubles - ; CHECK-NOT: call {<4 x double>, i1} @llvm.dx.resource.load.rawbuffer - ; CHECK: [[L0:%.*]] = call { <4 x i32>, i1 } - ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v4f64_0_0t( - ; CHECK-SAME: target("dx.Rawbuffer", <4 x double>, 0, 0) [[B]], i32 %index, i32 0) - - ; CHECK: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0 - ; CHECK: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0 - ; CHECK: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1 - ; CHECK: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2 - ; CHECK: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3 - ; CHECK: [[DBL1:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo1]], i32 [[Hi1]]) - ; CHECK: [[Vec1:%.*]] = insertelement <4 x double> poison, double [[DBL1]], i32 0 - ; CHECK: [[DBL2:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo2]], i32 [[Hi2]]) - ; CHECK: [[Vec2:%.*]] = insertelement <4 x double> [[Vec1]], double [[DBL2]], i32 1 + ; CHECK62-NOT: call {<4 x double>, i1} @llvm.dx.resource.load.rawbuffer + ; CHECK62: [[L0:%.*]] = call { <4 x i32>, i1 } + ; CHECK62-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v4f64_0_0t( + ; CHECK62-SAME: target("dx.Rawbuffer", <4 x double>, 0, 0) [[B]], i32 %index, i32 0) + + ; CHECK62: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0 + ; CHECK62: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0 + ; CHECK62: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1 + ; CHECK62: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2 + ; CHECK62: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3 + ; CHECK62: [[DBL1:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo1]], i32 [[Hi1]]) + ; CHECK62: [[Vec1:%.*]] = insertelement <4 x double> poison, double [[DBL1]], i32 0 + ; CHECK62: [[DBL2:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo2]], i32 [[Hi2]]) + ; CHECK62: [[Vec2:%.*]] = insertelement <4 x double> [[Vec1]], double [[DBL2]], i32 1 ; 2nd load - ; CHECK: [[L2:%.*]] = call { <4 x i32>, i1 } - ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v4f64_0_0t( - ; CHECK-SAME: target("dx.Rawbuffer", <4 x double>, 0, 0) [[B]], i32 %index, i32 16) - - ; CHECK: [[D2:%.*]] = extractvalue { <4 x i32>, i1 } [[L2]], 0 - ; CHECK: [[Lo3:%.*]] = extractelement <4 x i32> [[D2]], i32 0 - ; CHECK: [[Hi3:%.*]] = extractelement <4 x i32> [[D2]], i32 1 - ; CHECK: [[Lo4:%.*]] = extractelement <4 x i32> [[D2]], i32 2 - ; CHECK: [[Hi4:%.*]] = extractelement <4 x i32> [[D2]], i32 3 - ; CHECK: [[DBL3:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo3]], i32 [[Hi3]]) - ; CHECK: [[Vec3:%.*]] = insertelement <4 x double> [[Vec2]], double [[DBL3]], i32 2 - ; CHECK: [[DBL4:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo4]], i32 [[Hi4]]) - ; CHECK: [[Vec4:%.*]] = insertelement <4 x double> [[Vec3]], double [[DBL4]], i32 3 + ; CHECK62: [[L2:%.*]] = call { <4 x i32>, i1 } + ; CHECK62-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v4f64_0_0t( + ; CHECK62-SAME: target("dx.Rawbuffer", <4 x double>, 0, 0) [[B]], i32 %index, i32 16) + + ; CHECK62: [[D2:%.*]] = extractvalue { <4 x i32>, i1 } [[L2]], 0 + ; CHECK62: [[Lo3:%.*]] = extractelement <4 x i32> [[D2]], i32 0 + ; CHECK62: [[Hi3:%.*]] = extractelement <4 x i32> [[D2]], i32 1 + ; CHECK62: [[Lo4:%.*]] = extractelement <4 x i32> [[D2]], i32 2 + ; CHECK62: [[Hi4:%.*]] = extractelement <4 x i32> [[D2]], i32 3 + ; CHECK62: [[DBL3:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo3]], i32 [[Hi3]]) + ; CHECK62: [[Vec3:%.*]] = insertelement <4 x double> [[Vec2]], double [[DBL3]], i32 2 + ; CHECK62: [[DBL4:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo4]], i32 [[Hi4]]) + ; CHECK62: [[Vec4:%.*]] = insertelement <4 x double> [[Vec3]], double [[DBL4]], i32 3 %load0 = call {<4 x double>, i1} @llvm.dx.resource.load.rawbuffer( target("dx.Rawbuffer", <4 x double>, 0, 0) %buffer, i32 %index, i32 0) + ; CHECK63: extractvalue { <4 x double>, i1 } [[L0]], 0 + ; CHECK63: extractvalue { <4 x double>, i1 } [[L0]], 1 - ; CHECK-NOT: extractvalue {<4 x double>, i1 } + ; CHECK62-NOT: extractvalue {<4 x double>, i1 } %data0 = extractvalue {<4 x double>, i1} %load0, 0 ; check we extract checkbit from both loads and and them together - ; CHECK: [[B1:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 1 - ; CHECK: [[B2:%.*]] = extractvalue { <4 x i32>, i1 } [[L2]], 1 - ; CHECK: and i1 [[B1]], [[B2]] + ; CHECK62: [[B1:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 1 + ; CHECK62: [[B2:%.*]] = extractvalue { <4 x i32>, i1 } [[L2]], 1 + ; CHECK62: and i1 [[B1]], [[B2]] %cb = extractvalue {<4 x double>, i1} %load0, 1 ret void } diff --git a/llvm/test/CodeGen/DirectX/RawBufferLoadInt64.ll b/llvm/test/CodeGen/DirectX/RawBufferLoadInt64.ll index 2ed939fd48226..a1c153f2c0c84 100644 --- a/llvm/test/CodeGen/DirectX/RawBufferLoadInt64.ll +++ b/llvm/test/CodeGen/DirectX/RawBufferLoadInt64.ll @@ -1,6 +1,5 @@ -; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s - -target triple = "dxil-pc-shadermodel6.2-compute" +; RUN: opt -mtriple=dxil-pc-shadermodel6.2-compute -S -dxil-intrinsic-expansion %s | FileCheck %s --check-prefixes=CHECK,CHECK62 +; RUN: opt -mtriple=dxil-pc-shadermodel6.3-compute -S -dxil-intrinsic-expansion %s | FileCheck %s --check-prefixes=CHECK,CHECK63 define void @loadi64(i32 %index) { ; check the handle from binding is unchanged @@ -11,23 +10,29 @@ define void @loadi64(i32 %index) { @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_i64_1_0_0t( i32 0, i32 1, i32 1, i32 0, i1 false, ptr null) + ; check we don't modify the code in sm6.3 or later + ; CHECK63: [[L0:%.*]] = call { i64, i1 } @llvm.dx.resource.load.rawbuffer + ; CHECK63-SAME: target("dx.Rawbuffer", i64, 1, 0, 0) [[B]], i32 %index, i32 0) + ; check we load an <2 x i32> instead of a i64 - ; CHECK-NOT: call {i64, i1} @llvm.dx.resource.load.rawbuffer - ; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 } - ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v2i32.tdx.Rawbuffer_i64_1_0_0t( - ; CHECK-SAME: target("dx.Rawbuffer", i64, 1, 0, 0) [[B]], i32 %index, i32 0) + ; CHECK62-NOT: call {i64, i1} @llvm.dx.resource.load.rawbuffer + ; CHECK62: [[L0:%.*]] = call { <2 x i32>, i1 } + ; CHECK62-SAME: @llvm.dx.resource.load.rawbuffer.v2i32.tdx.Rawbuffer_i64_1_0_0t( + ; CHECK62-SAME: target("dx.Rawbuffer", i64, 1, 0, 0) [[B]], i32 %index, i32 0) %load0 = call {i64, i1} @llvm.dx.resource.load.rawbuffer( target("dx.Rawbuffer", i64, 1, 0, 0) %buffer, i32 %index, i32 0) + ; CHECK63: extractvalue { i64, i1 } [[L0]], 0 + ; check we extract the two i32 and construct a i64 - ; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0 - ; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i32 0 - ; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i32 1 - ; CHECK: [[ZLo1:%.*]] = zext i32 [[Lo]] to i64 - ; CHECK: [[ZHi1:%.*]] = zext i32 [[Hi]] to i64 - ; CHECK: [[A:%.*]] = shl i64 [[ZHi1]], 32 - ; CHECK: [[B:%.*]] = or i64 [[ZLo1]], [[A]] - ; CHECK-NOT: extractvalue { i64, i1 } + ; CHECK62: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0 + ; CHECK62: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i32 0 + ; CHECK62: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i32 1 + ; CHECK62: [[ZLo1:%.*]] = zext i32 [[Lo]] to i64 + ; CHECK62: [[ZHi1:%.*]] = zext i32 [[Hi]] to i64 + ; CHECK62: [[A:%.*]] = shl i64 [[ZHi1]], 32 + ; CHECK62: [[B:%.*]] = or i64 [[ZLo1]], [[A]] + ; CHECK62-NOT: extractvalue { i64, i1 } %data0 = extractvalue {i64, i1} %load0, 0 ret void } @@ -41,30 +46,36 @@ define void @loadv2i64(i32 %index) { @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v2i64_1_0_0t( i32 0, i32 1, i32 1, i32 0, i1 false, ptr null) + ; check we don't modify the code in sm6.3 or later + ; CHECK63: [[L0:%.*]] = call { <2 x i64>, i1 } @llvm.dx.resource.load.rawbuffer + ; CHECK63-SAME: target("dx.Rawbuffer", <2 x i64>, 1, 0, 0) [[B]], i32 %index, i32 0) + ; check we load an <4 x i32> instead of a i642 - ; CHECK: [[L0:%.*]] = call { <4 x i32>, i1 } - ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v2i64_1_0_0t( - ; CHECK-SAME: target("dx.Rawbuffer", <2 x i64>, 1, 0, 0) [[B]], i32 %index, i32 0) + ; CHECK62: [[L0:%.*]] = call { <4 x i32>, i1 } + ; CHECK62-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v2i64_1_0_0t( + ; CHECK62-SAME: target("dx.Rawbuffer", <2 x i64>, 1, 0, 0) [[B]], i32 %index, i32 0) %load0 = call { <2 x i64>, i1 } @llvm.dx.resource.load.rawbuffer( target("dx.Rawbuffer", <2 x i64>, 1, 0, 0) %buffer, i32 %index, i32 0) + ; CHECK63: extractvalue { <2 x i64>, i1 } [[L0]], 0 + ; check we extract the 4 i32 and construct a <2 x i64> - ; CHECK: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0 - ; CHECK: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0 - ; CHECK: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1 - ; CHECK: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2 - ; CHECK: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3 - ; CHECK: [[ZLo1:%.*]] = zext i32 [[Lo1]] to i64 - ; CHECK: [[ZHi1:%.*]] = zext i32 [[Hi1]] to i64 - ; CHECK: [[A:%.*]] = shl i64 [[ZHi1]], 32 - ; CHECK: [[B:%.*]] = or i64 [[ZLo1]], [[A]] - ; CHECK: [[Vec:%.*]] = insertelement <2 x i64> poison, i64 [[B]], i32 0 - ; CHECK: [[ZLo2:%.*]] = zext i32 [[Lo2]] to i64 - ; CHECK: [[ZHi2:%.*]] = zext i32 [[Hi2]] to i64 - ; CHECK: [[A:%.*]] = shl i64 [[ZHi2]], 32 - ; CHECK: [[B:%.*]] = or i64 [[ZLo2]], [[A]] - ; CHECK: [[Vec2:%.*]] = insertelement <2 x i64> [[Vec]], i64 [[B]], i32 1 - ; CHECK-NOT: extractvalue { <2 x i64>, i1 } + ; CHECK62: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0 + ; CHECK62: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0 + ; CHECK62: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1 + ; CHECK62: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2 + ; CHECK62: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3 + ; CHECK62: [[ZLo1:%.*]] = zext i32 [[Lo1]] to i64 + ; CHECK62: [[ZHi1:%.*]] = zext i32 [[Hi1]] to i64 + ; CHECK62: [[A:%.*]] = shl i64 [[ZHi1]], 32 + ; CHECK62: [[B:%.*]] = or i64 [[ZLo1]], [[A]] + ; CHECK62: [[Vec:%.*]] = insertelement <2 x i64> poison, i64 [[B]], i32 0 + ; CHECK62: [[ZLo2:%.*]] = zext i32 [[Lo2]] to i64 + ; CHECK62: [[ZHi2:%.*]] = zext i32 [[Hi2]] to i64 + ; CHECK62: [[A:%.*]] = shl i64 [[ZHi2]], 32 + ; CHECK62: [[B:%.*]] = or i64 [[ZLo2]], [[A]] + ; CHECK62: [[Vec2:%.*]] = insertelement <2 x i64> [[Vec]], i64 [[B]], i32 1 + ; CHECK62-NOT: extractvalue { <2 x i64>, i1 } %data0 = extractvalue { <2 x i64>, i1 } %load0, 0 ret void } @@ -79,25 +90,32 @@ define void @loadi64WithCheckBit(i32 %index) { @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_i64_1_0_0t( i32 0, i32 1, i32 1, i32 0, i1 false, ptr null) + ; check we don't modify the code in sm6.3 or later + ; CHECK63: [[L0:%.*]] = call { i64, i1 } @llvm.dx.resource.load.rawbuffer + ; CHECK63-SAME: target("dx.Rawbuffer", i64, 1, 0, 0) [[B]], i32 %index, i32 0) + ; check we load an <2 x i32> instead of a i64 - ; CHECK-NOT: call {i64, i1} @llvm.dx.resource.load.rawbuffer - ; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 } - ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v2i32.tdx.Rawbuffer_i64_1_0_0t( - ; CHECK-SAME: target("dx.Rawbuffer", i64, 1, 0, 0) [[B]], i32 %index, i32 0) + ; CHECK62-NOT: call {i64, i1} @llvm.dx.resource.load.rawbuffer + ; CHECK62: [[L0:%.*]] = call { <2 x i32>, i1 } + ; CHECK62-SAME: @llvm.dx.resource.load.rawbuffer.v2i32.tdx.Rawbuffer_i64_1_0_0t( + ; CHECK62-SAME: target("dx.Rawbuffer", i64, 1, 0, 0) [[B]], i32 %index, i32 0) %load0 = call {i64, i1} @llvm.dx.resource.load.rawbuffer( target("dx.Rawbuffer", i64, 1, 0, 0) %buffer, i32 %index, i32 0) + ; CHECK63: extractvalue { i64, i1 } [[L0]], 0 + ; CHECK63: extractvalue { i64, i1 } [[L0]], 1 + ; check we extract the two i32 and construct a i64 - ; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0 - ; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i32 0 - ; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i32 1 - ; CHECK: [[ZLo1:%.*]] = zext i32 [[Lo]] to i64 - ; CHECK: [[ZHi1:%.*]] = zext i32 [[Hi]] to i64 - ; CHECK: [[A:%.*]] = shl i64 [[ZHi1]], 32 - ; CHECK: [[B:%.*]] = or i64 [[ZLo1]], [[A]] + ; CHECK62: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0 + ; CHECK62: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i32 0 + ; CHECK62: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i32 1 + ; CHECK62: [[ZLo1:%.*]] = zext i32 [[Lo]] to i64 + ; CHECK62: [[ZHi1:%.*]] = zext i32 [[Hi]] to i64 + ; CHECK62: [[A:%.*]] = shl i64 [[ZHi1]], 32 + ; CHECK62: [[B:%.*]] = or i64 [[ZLo1]], [[A]] %data0 = extractvalue {i64, i1} %load0, 0 - ; CHECK: extractvalue { <2 x i32>, i1 } [[L0]], 1 - ; CHECK-NOT: extractvalue { i64, i1 } + ; CHECK62: extractvalue { <2 x i32>, i1 } [[L0]], 1 + ; CHECK62-NOT: extractvalue { i64, i1 } %cb = extractvalue {i64, i1} %load0, 1 ret void } @@ -115,118 +133,130 @@ define void @loadv3i64(i32 %index) { @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v3i64_0_0t( i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + ; check we don't modify the code in sm6.3 or later + ; CHECK63: [[L0:%.*]] = call { <3 x i64>, i1 } @llvm.dx.resource.load.rawbuffer + ; CHECK63-SAME: target("dx.Rawbuffer", <3 x i64>, 0, 0) [[B]], i32 %index, i32 0) + ; check we perform two loads ; and do 6 extracts and construct 3 i64s - ; CHECK-NOT: call {<3 x i64>, i1} @llvm.dx.resource.load.rawbuffer - ; CHECK: [[L0:%.*]] = call { <4 x i32>, i1 } - ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v3i64_0_0t( - ; CHECK-SAME: target("dx.Rawbuffer", <3 x i64>, 0, 0) [[Buf]], i32 %index, i32 0) - - ; CHECK: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0 - ; CHECK: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0 - ; CHECK: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1 - ; CHECK: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2 - ; CHECK: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3 - ; CHECK: [[ZLo1:%.*]] = zext i32 [[Lo1]] to i64 - ; CHECK: [[ZHi1:%.*]] = zext i32 [[Hi1]] to i64 - ; CHECK: [[A:%.*]] = shl i64 [[ZHi1]], 32 - ; CHECK: [[B:%.*]] = or i64 [[ZLo1]], [[A]] - ; CHECK: [[Vec1:%.*]] = insertelement <3 x i64> poison, i64 [[B]], i32 0 - ; CHECK: [[ZLo2:%.*]] = zext i32 [[Lo2]] to i64 - ; CHECK: [[ZHi2:%.*]] = zext i32 [[Hi2]] to i64 - ; CHECK: [[A:%.*]] = shl i64 [[ZHi2]], 32 - ; CHECK: [[B:%.*]] = or i64 [[ZLo2]], [[A]] - ; CHECK: [[Vec2:%.*]] = insertelement <3 x i64> [[Vec1]], i64 [[B]], i32 1 + ; CHECK62-NOT: call {<3 x i64>, i1} @llvm.dx.resource.load.rawbuffer + ; CHECK62: [[L0:%.*]] = call { <4 x i32>, i1 } + ; CHECK62-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v3i64_0_0t( + ; CHECK62-SAME: target("dx.Rawbuffer", <3 x i64>, 0, 0) [[Buf]], i32 %index, i32 0) + + ; CHECK62: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0 + ; CHECK62: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0 + ; CHECK62: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1 + ; CHECK62: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2 + ; CHECK62: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3 + ; CHECK62: [[ZLo1:%.*]] = zext i32 [[Lo1]] to i64 + ; CHECK62: [[ZHi1:%.*]] = zext i32 [[Hi1]] to i64 + ; CHECK62: [[A:%.*]] = shl i64 [[ZHi1]], 32 + ; CHECK62: [[B:%.*]] = or i64 [[ZLo1]], [[A]] + ; CHECK62: [[Vec1:%.*]] = insertelement <3 x i64> poison, i64 [[B]], i32 0 + ; CHECK62: [[ZLo2:%.*]] = zext i32 [[Lo2]] to i64 + ; CHECK62: [[ZHi2:%.*]] = zext i32 [[Hi2]] to i64 + ; CHECK62: [[A:%.*]] = shl i64 [[ZHi2]], 32 + ; CHECK62: [[B:%.*]] = or i64 [[ZLo2]], [[A]] + ; CHECK62: [[Vec2:%.*]] = insertelement <3 x i64> [[Vec1]], i64 [[B]], i32 1 ; 2nd load - ; CHECK: [[L2:%.*]] = call { <2 x i32>, i1 } - ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v2i32.tdx.Rawbuffer_v3i64_0_0t( - ; CHECK-SAME: target("dx.Rawbuffer", <3 x i64>, 0, 0) [[Buf]], i32 %index, i32 16) - - ; CHECK: [[D2:%.*]] = extractvalue { <2 x i32>, i1 } [[L2]], 0 - ; CHECK: [[Lo3:%.*]] = extractelement <2 x i32> [[D2]], i32 0 - ; CHECK: [[Hi3:%.*]] = extractelement <2 x i32> [[D2]], i32 1 - ; CHECK: [[ZLo3:%.*]] = zext i32 [[Lo3]] to i64 - ; CHECK: [[ZHi3:%.*]] = zext i32 [[Hi3]] to i64 - ; CHECK: [[A:%.*]] = shl i64 [[ZHi3]], 32 - ; CHECK: [[B:%.*]] = or i64 [[ZLo3]], [[A]] - ; CHECK: [[Vec3:%.*]] = insertelement <3 x i64> [[Vec2]], i64 [[B]], i32 2 + ; CHECK62: [[L2:%.*]] = call { <2 x i32>, i1 } + ; CHECK62-SAME: @llvm.dx.resource.load.rawbuffer.v2i32.tdx.Rawbuffer_v3i64_0_0t( + ; CHECK62-SAME: target("dx.Rawbuffer", <3 x i64>, 0, 0) [[Buf]], i32 %index, i32 16) + + ; CHECK62: [[D2:%.*]] = extractvalue { <2 x i32>, i1 } [[L2]], 0 + ; CHECK62: [[Lo3:%.*]] = extractelement <2 x i32> [[D2]], i32 0 + ; CHECK62: [[Hi3:%.*]] = extractelement <2 x i32> [[D2]], i32 1 + ; CHECK62: [[ZLo3:%.*]] = zext i32 [[Lo3]] to i64 + ; CHECK62: [[ZHi3:%.*]] = zext i32 [[Hi3]] to i64 + ; CHECK62: [[A:%.*]] = shl i64 [[ZHi3]], 32 + ; CHECK62: [[B:%.*]] = or i64 [[ZLo3]], [[A]] + ; CHECK62: [[Vec3:%.*]] = insertelement <3 x i64> [[Vec2]], i64 [[B]], i32 2 %load0 = call {<3 x i64>, i1} @llvm.dx.resource.load.rawbuffer( target("dx.Rawbuffer", <3 x i64>, 0, 0) %buffer, i32 %index, i32 0) + ; CHECK63: extractvalue { <3 x i64>, i1 } [[L0]], 0 + ; CHECK63: extractvalue { <3 x i64>, i1 } [[L0]], 1 - ; CHECK-NOT: extractvalue {<3 x i64>, i1 } + ; CHECK62-NOT: extractvalue {<3 x i64>, i1 } %data0 = extractvalue {<3 x i64>, i1} %load0, 0 ; check we extract checkbit from both loads and and them together - ; CHECK: [[B1:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 1 - ; CHECK: [[B2:%.*]] = extractvalue { <2 x i32>, i1 } [[L2]], 1 - ; CHECK: and i1 [[B1]], [[B2]] + ; CHECK62: [[B1:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 1 + ; CHECK62: [[B2:%.*]] = extractvalue { <2 x i32>, i1 } [[L2]], 1 + ; CHECK62: and i1 [[B1]], [[B2]] %cb = extractvalue {<3 x i64>, i1} %load0, 1 ret void } define void @loadv4i64(i32 %index) { ; check the handle from binding is unchanged - ; CHECK: [[Buf:%.*]] = call target("dx.Rawbuffer", <4 x i64>, 0, 0) - ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v4i64_0_0t( - ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + ; CHECK62: [[Buf:%.*]] = call target("dx.Rawbuffer", <4 x i64>, 0, 0) + ; CHECK62-SAME: @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v4i64_0_0t( + ; CHECK62-SAME: i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) %buffer = call target("dx.Rawbuffer", <4 x i64>, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v4i64_0_0t( i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + ; check we don't modify the code in sm6.3 or later + ; CHECK63: [[L0:%.*]] = call { <4 x i64>, i1 } @llvm.dx.resource.load.rawbuffer + ; CHECK63-SAME: target("dx.Rawbuffer", <4 x i64>, 0, 0) [[B]], i32 %index, i32 0) + ; check we perform two loads ; and do 8 extracts and construct 4 i64s - ; CHECK-NOT: call {<4 x i64>, i1} @llvm.dx.resource.load.rawbuffer - ; CHECK: [[L0:%.*]] = call { <4 x i32>, i1 } - ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v4i64_0_0t( - ; CHECK-SAME: target("dx.Rawbuffer", <4 x i64>, 0, 0) [[Buf]], i32 %index, i32 0) - - ; CHECK: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0 - ; CHECK: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0 - ; CHECK: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1 - ; CHECK: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2 - ; CHECK: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3 - ; CHECK: [[ZLo1:%.*]] = zext i32 [[Lo1]] to i64 - ; CHECK: [[ZHi1:%.*]] = zext i32 [[Hi1]] to i64 - ; CHECK: [[A:%.*]] = shl i64 [[ZHi1]], 32 - ; CHECK: [[B:%.*]] = or i64 [[ZLo1]], [[A]] - ; CHECK: [[Vec1:%.*]] = insertelement <4 x i64> poison, i64 [[B]], i32 0 - ; CHECK: [[ZLo2:%.*]] = zext i32 [[Lo2]] to i64 - ; CHECK: [[ZHi2:%.*]] = zext i32 [[Hi2]] to i64 - ; CHECK: [[A:%.*]] = shl i64 [[ZHi2]], 32 - ; CHECK: [[B:%.*]] = or i64 [[ZLo2]], [[A]] - ; CHECK: [[Vec2:%.*]] = insertelement <4 x i64> [[Vec1]], i64 [[B]], i32 1 + ; CHECK62-NOT: call {<4 x i64>, i1} @llvm.dx.resource.load.rawbuffer + ; CHECK62: [[L0:%.*]] = call { <4 x i32>, i1 } + ; CHECK62-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v4i64_0_0t( + ; CHECK62-SAME: target("dx.Rawbuffer", <4 x i64>, 0, 0) [[Buf]], i32 %index, i32 0) + + ; CHECK62: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0 + ; CHECK62: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0 + ; CHECK62: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1 + ; CHECK62: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2 + ; CHECK62: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3 + ; CHECK62: [[ZLo1:%.*]] = zext i32 [[Lo1]] to i64 + ; CHECK62: [[ZHi1:%.*]] = zext i32 [[Hi1]] to i64 + ; CHECK62: [[A:%.*]] = shl i64 [[ZHi1]], 32 + ; CHECK62: [[B:%.*]] = or i64 [[ZLo1]], [[A]] + ; CHECK62: [[Vec1:%.*]] = insertelement <4 x i64> poison, i64 [[B]], i32 0 + ; CHECK62: [[ZLo2:%.*]] = zext i32 [[Lo2]] to i64 + ; CHECK62: [[ZHi2:%.*]] = zext i32 [[Hi2]] to i64 + ; CHECK62: [[A:%.*]] = shl i64 [[ZHi2]], 32 + ; CHECK62: [[B:%.*]] = or i64 [[ZLo2]], [[A]] + ; CHECK62: [[Vec2:%.*]] = insertelement <4 x i64> [[Vec1]], i64 [[B]], i32 1 ; 2nd load - ; CHECK: [[L2:%.*]] = call { <4 x i32>, i1 } - ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v4i64_0_0t( - ; CHECK-SAME: target("dx.Rawbuffer", <4 x i64>, 0, 0) [[Buf]], i32 %index, i32 16) - - ; CHECK: [[D2:%.*]] = extractvalue { <4 x i32>, i1 } [[L2]], 0 - ; CHECK: [[Lo3:%.*]] = extractelement <4 x i32> [[D2]], i32 0 - ; CHECK: [[Hi3:%.*]] = extractelement <4 x i32> [[D2]], i32 1 - ; CHECK: [[Lo4:%.*]] = extractelement <4 x i32> [[D2]], i32 2 - ; CHECK: [[Hi4:%.*]] = extractelement <4 x i32> [[D2]], i32 3 - ; CHECK: [[ZLo3:%.*]] = zext i32 [[Lo3]] to i64 - ; CHECK: [[ZHi3:%.*]] = zext i32 [[Hi3]] to i64 - ; CHECK: [[A:%.*]] = shl i64 [[ZHi3]], 32 - ; CHECK: [[B:%.*]] = or i64 [[ZLo3]], [[A]] - ; CHECK: [[Vec3:%.*]] = insertelement <4 x i64> [[Vec2]], i64 [[B]], i32 2 - ; CHECK: [[ZLo4:%.*]] = zext i32 [[Lo4]] to i64 - ; CHECK: [[ZHi4:%.*]] = zext i32 [[Hi4]] to i64 - ; CHECK: [[A:%.*]] = shl i64 [[ZHi4]], 32 - ; CHECK: [[B:%.*]] = or i64 [[ZLo4]], [[A]] - ; CHECK: [[Vec4:%.*]] = insertelement <4 x i64> [[Vec3]], i64 [[B]], i32 3 + ; CHECK62: [[L2:%.*]] = call { <4 x i32>, i1 } + ; CHECK62-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v4i64_0_0t( + ; CHECK62-SAME: target("dx.Rawbuffer", <4 x i64>, 0, 0) [[Buf]], i32 %index, i32 16) + + ; CHECK62: [[D2:%.*]] = extractvalue { <4 x i32>, i1 } [[L2]], 0 + ; CHECK62: [[Lo3:%.*]] = extractelement <4 x i32> [[D2]], i32 0 + ; CHECK62: [[Hi3:%.*]] = extractelement <4 x i32> [[D2]], i32 1 + ; CHECK62: [[Lo4:%.*]] = extractelement <4 x i32> [[D2]], i32 2 + ; CHECK62: [[Hi4:%.*]] = extractelement <4 x i32> [[D2]], i32 3 + ; CHECK62: [[ZLo3:%.*]] = zext i32 [[Lo3]] to i64 + ; CHECK62: [[ZHi3:%.*]] = zext i32 [[Hi3]] to i64 + ; CHECK62: [[A:%.*]] = shl i64 [[ZHi3]], 32 + ; CHECK62: [[B:%.*]] = or i64 [[ZLo3]], [[A]] + ; CHECK62: [[Vec3:%.*]] = insertelement <4 x i64> [[Vec2]], i64 [[B]], i32 2 + ; CHECK62: [[ZLo4:%.*]] = zext i32 [[Lo4]] to i64 + ; CHECK62: [[ZHi4:%.*]] = zext i32 [[Hi4]] to i64 + ; CHECK62: [[A:%.*]] = shl i64 [[ZHi4]], 32 + ; CHECK62: [[B:%.*]] = or i64 [[ZLo4]], [[A]] + ; CHECK62: [[Vec4:%.*]] = insertelement <4 x i64> [[Vec3]], i64 [[B]], i32 3 %load0 = call {<4 x i64>, i1} @llvm.dx.resource.load.rawbuffer( target("dx.Rawbuffer", <4 x i64>, 0, 0) %buffer, i32 %index, i32 0) + ; CHECK63: extractvalue { <4 x i64>, i1 } [[L0]], 0 + ; CHECK63: extractvalue { <4 x i64>, i1 } [[L0]], 1 - ; CHECK-NOT: extractvalue {<4 x i64>, i1 } + ; CHECK62-NOT: extractvalue {<4 x i64>, i1 } %data0 = extractvalue {<4 x i64>, i1} %load0, 0 ; check we extract checkbit from both loads and and them together - ; CHECK: [[B1:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 1 - ; CHECK: [[B2:%.*]] = extractvalue { <4 x i32>, i1 } [[L2]], 1 - ; CHECK: and i1 [[B1]], [[B2]] + ; CHECK62: [[B1:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 1 + ; CHECK62: [[B2:%.*]] = extractvalue { <4 x i32>, i1 } [[L2]], 1 + ; CHECK62: and i1 [[B1]], [[B2]] %cb = extractvalue {<4 x i64>, i1} %load0, 1 ret void } diff --git a/llvm/test/CodeGen/DirectX/RawBufferStoreDouble.ll b/llvm/test/CodeGen/DirectX/RawBufferStoreDouble.ll index 7af0117842c03..ddcd761d812fa 100644 --- a/llvm/test/CodeGen/DirectX/RawBufferStoreDouble.ll +++ b/llvm/test/CodeGen/DirectX/RawBufferStoreDouble.ll @@ -1,6 +1,5 @@ -; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s - -target triple = "dxil-pc-shadermodel6.2-compute" +; RUN: opt -mtriple=dxil-pc-shadermodel6.2-compute -S -dxil-intrinsic-expansion %s | FileCheck %s --check-prefixes=CHECK,CHECK62 +; RUN: opt -mtriple=dxil-pc-shadermodel6.3-compute -S -dxil-intrinsic-expansion %s | FileCheck %s --check-prefixes=CHECK,CHECK63 define void @storef64(double %0, i32 %index) { ; CHECK: [[B:%.*]] = tail call target("dx.RawBuffer", double, 1, 0) @@ -10,14 +9,18 @@ define void @storef64(double %0, i32 %index) { @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_f64_1_0t( i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + ; check we don't modify the code in sm6.3 or later + ; CHECK63: call void @llvm.dx.resource.store.rawbuffer + ; CHECK63-SAME: target("dx.RawBuffer", double, 1, 0) [[B]], i32 %index, i32 0, double %0) + ; check we split the double and store the lo and hi bits - ; CHECK: [[SD:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double %0) - ; CHECK: [[Lo:%.*]] = extractvalue { i32, i32 } [[SD]], 0 - ; CHECK: [[Hi:%.*]] = extractvalue { i32, i32 } [[SD]], 1 - ; CHECK: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[Lo]], i32 0 - ; CHECK: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[Hi]], i32 1 - ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f64_1_0t.v2i32( - ; CHECK-SAME: target("dx.RawBuffer", double, 1, 0) [[B]], i32 %index, i32 0, <2 x i32> [[Vec2]]) + ; CHECK62: [[SD:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double %0) + ; CHECK62: [[Lo:%.*]] = extractvalue { i32, i32 } [[SD]], 0 + ; CHECK62: [[Hi:%.*]] = extractvalue { i32, i32 } [[SD]], 1 + ; CHECK62: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[Lo]], i32 0 + ; CHECK62: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[Hi]], i32 1 + ; CHECK62: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f64_1_0t.v2i32( + ; CHECK62-SAME: target("dx.RawBuffer", double, 1, 0) [[B]], i32 %index, i32 0, <2 x i32> [[Vec2]]) call void @llvm.dx.resource.store.rawbuffer( target("dx.RawBuffer", double, 1, 0) %buffer, i32 %index, i32 0, double %0) @@ -32,13 +35,17 @@ define void @storev2f64(<2 x double> %0, i32 %index) { @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v2f64_1_0t( i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) - ; CHECK: [[SD:%.*]] = call { <2 x i32>, <2 x i32> } - ; CHECK-SAME: @llvm.dx.splitdouble.v2i32(<2 x double> %0) - ; CHECK: [[Lo:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 0 - ; CHECK: [[Hi:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 1 - ; CHECK: [[Vec:%.*]] = shufflevector <2 x i32> [[Lo]], <2 x i32> [[Hi]], <4 x i32> - ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v2f64_1_0t.v4i32( - ; CHECK-SAME: target("dx.RawBuffer", <2 x double>, 1, 0) [[B]], i32 %index, i32 0, <4 x i32> [[Vec]]) + ; check we don't modify the code in sm6.3 or later + ; CHECK63: call void @llvm.dx.resource.store.rawbuffer + ; CHECK63-SAME: target("dx.RawBuffer", <2 x double>, 1, 0) [[B]], i32 %index, i32 0, <2 x double> %0) + + ; CHECK62: [[SD:%.*]] = call { <2 x i32>, <2 x i32> } + ; CHECK62-SAME: @llvm.dx.splitdouble.v2i32(<2 x double> %0) + ; CHECK62: [[Lo:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 0 + ; CHECK62: [[Hi:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 1 + ; CHECK62: [[Vec:%.*]] = shufflevector <2 x i32> [[Lo]], <2 x i32> [[Hi]], <4 x i32> + ; CHECK62: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v2f64_1_0t.v4i32( + ; CHECK62-SAME: target("dx.RawBuffer", <2 x double>, 1, 0) [[B]], i32 %index, i32 0, <4 x i32> [[Vec]]) call void @llvm.dx.resource.store.rawbuffer( target("dx.RawBuffer", <2 x double>, 1, 0) %buffer, i32 %index, i32 0, <2 x double> %0) @@ -53,14 +60,18 @@ define void @storev3f64(<3 x double> %0, i32 %index) { @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v3f64_1_0t( i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) - ; CHECK: [[A:%.*]] = call { <3 x i32>, <3 x i32> } @llvm.dx.splitdouble.v3i32(<3 x double> %0) - ; CHECK: [[B:%.*]] = extractvalue { <3 x i32>, <3 x i32> } [[A]], 0 - ; CHECK: [[C:%.*]] = extractvalue { <3 x i32>, <3 x i32> } [[A]], 1 - ; CHECK: [[D:%.*]] = shufflevector <3 x i32> [[B]], <3 x i32> [[C]], <6 x i32> - ; CHECK: [[E:%.*]] = shufflevector <6 x i32> [[D]], <6 x i32> poison, <4 x i32> - ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v3f64_1_0t.v4i32(target("dx.RawBuffer", <3 x double>, 1, 0) [[Buf]], i32 %index, i32 0, <4 x i32> [[E]]) - ; CHECK: [[F:%.*]] = shufflevector <6 x i32> [[D]], <6 x i32> poison, <2 x i32> - ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v3f64_1_0t.v2i32(target("dx.RawBuffer", <3 x double>, 1, 0) [[Buf]], i32 %index, i32 16, <2 x i32> [[F]]) + ; check we don't modify the code in sm6.3 or later + ; CHECK63: call void @llvm.dx.resource.store.rawbuffer + ; CHECK63-SAME: target("dx.RawBuffer", <3 x double>, 1, 0) [[B]], i32 %index, i32 0, <3 x double> %0) + + ; CHECK62: [[A:%.*]] = call { <3 x i32>, <3 x i32> } @llvm.dx.splitdouble.v3i32(<3 x double> %0) + ; CHECK62: [[B:%.*]] = extractvalue { <3 x i32>, <3 x i32> } [[A]], 0 + ; CHECK62: [[C:%.*]] = extractvalue { <3 x i32>, <3 x i32> } [[A]], 1 + ; CHECK62: [[D:%.*]] = shufflevector <3 x i32> [[B]], <3 x i32> [[C]], <6 x i32> + ; CHECK62: [[E:%.*]] = shufflevector <6 x i32> [[D]], <6 x i32> poison, <4 x i32> + ; CHECK62: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v3f64_1_0t.v4i32(target("dx.RawBuffer", <3 x double>, 1, 0) [[Buf]], i32 %index, i32 0, <4 x i32> [[E]]) + ; CHECK62: [[F:%.*]] = shufflevector <6 x i32> [[D]], <6 x i32> poison, <2 x i32> + ; CHECK62: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v3f64_1_0t.v2i32(target("dx.RawBuffer", <3 x double>, 1, 0) [[Buf]], i32 %index, i32 16, <2 x i32> [[F]]) call void @llvm.dx.resource.store.rawbuffer( target("dx.RawBuffer", <3 x double>, 1, 0) %buffer, i32 %index, i32 0, <3 x double> %0) @@ -75,14 +86,18 @@ define void @storev4f64(<4 x double> %0, i32 %index) { @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v4f64_1_0t( i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) - ; CHECK: [[A:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.dx.splitdouble.v4i32(<4 x double> %0) - ; CHECK: [[B:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[A]], 0 - ; CHECK: [[C:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[A]], 1 - ; CHECK: [[D:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[C]], <8 x i32> - ; CHECK: [[E:%.*]] = shufflevector <8 x i32> [[D]], <8 x i32> poison, <4 x i32> - ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v4f64_1_0t.v4i32(target("dx.RawBuffer", <4 x double>, 1, 0) [[Buf]], i32 %index, i32 0, <4 x i32> [[E]]) - ; CHECK: [[F:%.*]] = shufflevector <8 x i32> [[D]], <8 x i32> poison, <4 x i32> - ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v4f64_1_0t.v4i32(target("dx.RawBuffer", <4 x double>, 1, 0) [[Buf]], i32 %index, i32 16, <4 x i32> [[F]]) + ; check we don't modify the code in sm6.3 or later + ; CHECK63: call void @llvm.dx.resource.store.rawbuffer + ; CHECK63-SAME: target("dx.RawBuffer", <4 x double>, 1, 0) [[B]], i32 %index, i32 0, <4 x double> %0) + + ; CHECK62: [[A:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.dx.splitdouble.v4i32(<4 x double> %0) + ; CHECK62: [[B:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[A]], 0 + ; CHECK62: [[C:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[A]], 1 + ; CHECK62: [[D:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[C]], <8 x i32> + ; CHECK62: [[E:%.*]] = shufflevector <8 x i32> [[D]], <8 x i32> poison, <4 x i32> + ; CHECK62: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v4f64_1_0t.v4i32(target("dx.RawBuffer", <4 x double>, 1, 0) [[Buf]], i32 %index, i32 0, <4 x i32> [[E]]) + ; CHECK62: [[F:%.*]] = shufflevector <8 x i32> [[D]], <8 x i32> poison, <4 x i32> + ; CHECK62: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v4f64_1_0t.v4i32(target("dx.RawBuffer", <4 x double>, 1, 0) [[Buf]], i32 %index, i32 16, <4 x i32> [[F]]) call void @llvm.dx.resource.store.rawbuffer( target("dx.RawBuffer", <4 x double>, 1, 0) %buffer, i32 %index, i32 0, <4 x double> %0) diff --git a/llvm/test/CodeGen/DirectX/RawBufferStoreInt64.ll b/llvm/test/CodeGen/DirectX/RawBufferStoreInt64.ll index 0cff8aaf8ac33..54ec4d2cd2fb7 100644 --- a/llvm/test/CodeGen/DirectX/RawBufferStoreInt64.ll +++ b/llvm/test/CodeGen/DirectX/RawBufferStoreInt64.ll @@ -1,6 +1,5 @@ -; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s - -target triple = "dxil-pc-shadermodel6.2-compute" +; RUN: opt -mtriple=dxil-pc-shadermodel6.2-compute -S -dxil-intrinsic-expansion %s | FileCheck %s --check-prefixes=CHECK,CHECK62 +; RUN: opt -mtriple=dxil-pc-shadermodel6.3-compute -S -dxil-intrinsic-expansion %s | FileCheck %s --check-prefixes=CHECK,CHECK63 define void @storei64(i64 %0, i32 %index) { ; CHECK: [[Buf:%.*]] = tail call target("dx.RawBuffer", i64, 1, 0) @@ -10,14 +9,18 @@ define void @storei64(i64 %0, i32 %index) { @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i64_1_0t( i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + ; check we don't modify the code in sm6.3 or later + ; CHECK63: call void @llvm.dx.resource.store.rawbuffer + ; CHECK63-SAME: target("dx.RawBuffer", i64, 1, 0) [[Buf]], i32 %index, i32 0, i64 %0) + ; check we split the i64 and store the lo and hi bits - ; CHECK: [[A:%.*]] = trunc i64 %0 to i32 - ; CHECK: [[B:%.*]] = lshr i64 %0, 32 - ; CHECK: [[C:%.*]] = trunc i64 [[B]] to i32 - ; CHECK: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i32 0 - ; CHECK: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[C]], i32 1 - ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i64_1_0t.v2i32( - ; CHECK-SAME: target("dx.RawBuffer", i64, 1, 0) [[Buf]], i32 %index, i32 0, <2 x i32> [[Vec2]]) + ; CHECK62: [[A:%.*]] = trunc i64 %0 to i32 + ; CHECK62: [[B:%.*]] = lshr i64 %0, 32 + ; CHECK62: [[C:%.*]] = trunc i64 [[B]] to i32 + ; CHECK62: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i32 0 + ; CHECK62: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[C]], i32 1 + ; CHECK62: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i64_1_0t.v2i32( + ; CHECK62-SAME: target("dx.RawBuffer", i64, 1, 0) [[Buf]], i32 %index, i32 0, <2 x i32> [[Vec2]]) call void @llvm.dx.resource.store.rawbuffer( target("dx.RawBuffer", i64, 1, 0) %buffer, i32 %index, i32 0, i64 %0) @@ -32,12 +35,16 @@ define void @storev2i64(<2 x i64> %0, i32 %index) { @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v2i64_1_0t( i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) - ; CHECK: [[A:%.*]] = trunc <2 x i64> %0 to <2 x i32> - ; CHECK: [[B:%.*]] = lshr <2 x i64> %0, splat (i64 32) - ; CHECK: [[C:%.*]] = trunc <2 x i64> [[B]] to <2 x i32> - ; CHECK: [[Vec:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[C]], <4 x i32> - ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v2i64_1_0t.v4i32( - ; CHECK-SAME: target("dx.RawBuffer", <2 x i64>, 1, 0) [[Buf]], i32 %index, i32 0, <4 x i32> [[Vec]]) + ; check we don't modify the code in sm6.3 or later + ; CHECK63: call void @llvm.dx.resource.store.rawbuffer + ; CHECK63-SAME: target("dx.RawBuffer", <2 x i64>, 1, 0) [[Buf]], i32 %index, i32 0, <2 x i64> %0) + + ; CHECK62: [[A:%.*]] = trunc <2 x i64> %0 to <2 x i32> + ; CHECK62: [[B:%.*]] = lshr <2 x i64> %0, splat (i64 32) + ; CHECK62: [[C:%.*]] = trunc <2 x i64> [[B]] to <2 x i32> + ; CHECK62: [[Vec:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[C]], <4 x i32> + ; CHECK62: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v2i64_1_0t.v4i32( + ; CHECK62-SAME: target("dx.RawBuffer", <2 x i64>, 1, 0) [[Buf]], i32 %index, i32 0, <4 x i32> [[Vec]]) call void @llvm.dx.resource.store.rawbuffer( target("dx.RawBuffer", <2 x i64>, 1, 0) %buffer, i32 %index, i32 0, <2 x i64> %0) @@ -52,14 +59,18 @@ define void @storev3i64(<3 x i64> %0, i32 %index) { @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v3i64_1_0t( i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) - ; CHECK: [[A:%.*]] = trunc <3 x i64> %0 to <3 x i32> - ; CHECK: [[B:%.*]] = lshr <3 x i64> %0, splat (i64 32) - ; CHECK: [[C:%.*]] = trunc <3 x i64> [[B]] to <3 x i32> - ; CHECK: [[D:%.*]] = shufflevector <3 x i32> [[A]], <3 x i32> [[C]], <6 x i32> - ; CHECK: [[E:%.*]] = shufflevector <6 x i32> [[D]], <6 x i32> poison, <4 x i32> - ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v3i64_1_0t.v4i32(target("dx.RawBuffer", <3 x i64>, 1, 0) [[Buf]], i32 %index, i32 0, <4 x i32> [[E]]) - ; CHECK: [[F:%.*]] = shufflevector <6 x i32> [[D]], <6 x i32> poison, <2 x i32> - ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v3i64_1_0t.v2i32(target("dx.RawBuffer", <3 x i64>, 1, 0) [[Buf]], i32 %index, i32 16, <2 x i32> [[F]]) + ; check we don't modify the code in sm6.3 or later + ; CHECK63: call void @llvm.dx.resource.store.rawbuffer + ; CHECK63-SAME: target("dx.RawBuffer", <3 x i64>, 1, 0) [[Buf]], i32 %index, i32 0, <3 x i64> %0) + + ; CHECK62: [[A:%.*]] = trunc <3 x i64> %0 to <3 x i32> + ; CHECK62: [[B:%.*]] = lshr <3 x i64> %0, splat (i64 32) + ; CHECK62: [[C:%.*]] = trunc <3 x i64> [[B]] to <3 x i32> + ; CHECK62: [[D:%.*]] = shufflevector <3 x i32> [[A]], <3 x i32> [[C]], <6 x i32> + ; CHECK62: [[E:%.*]] = shufflevector <6 x i32> [[D]], <6 x i32> poison, <4 x i32> + ; CHECK62: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v3i64_1_0t.v4i32(target("dx.RawBuffer", <3 x i64>, 1, 0) [[Buf]], i32 %index, i32 0, <4 x i32> [[E]]) + ; CHECK62: [[F:%.*]] = shufflevector <6 x i32> [[D]], <6 x i32> poison, <2 x i32> + ; CHECK62: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v3i64_1_0t.v2i32(target("dx.RawBuffer", <3 x i64>, 1, 0) [[Buf]], i32 %index, i32 16, <2 x i32> [[F]]) call void @llvm.dx.resource.store.rawbuffer( target("dx.RawBuffer", <3 x i64>, 1, 0) %buffer, i32 %index, i32 0, <3 x i64> %0) @@ -73,15 +84,18 @@ define void @storev4i64(<4 x i64> %0, i32 %index) { %buffer = tail call target("dx.RawBuffer", <4 x i64>, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v4i64_1_0t( i32 0, i32 0, i32 1, i32 0, i1 false, ptr null) + ; check we don't modify the code in sm6.3 or later + ; CHECK63: call void @llvm.dx.resource.store.rawbuffer + ; CHECK63-SAME: target("dx.RawBuffer", <4 x i64>, 1, 0) [[Buf]], i32 %index, i32 0, <4 x i64> %0) - ; CHECK: [[A:%.*]] = trunc <4 x i64> %0 to <4 x i32> - ; CHECK: [[B:%.*]] = lshr <4 x i64> %0, splat (i64 32) - ; CHECK: [[C:%.*]] = trunc <4 x i64> [[B]] to <4 x i32> - ; CHECK: [[D:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[C]], <8 x i32> - ; CHECK: [[E:%.*]] = shufflevector <8 x i32> [[D]], <8 x i32> poison, <4 x i32> - ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v4i64_1_0t.v4i32(target("dx.RawBuffer", <4 x i64>, 1, 0) [[Buf]], i32 %index, i32 0, <4 x i32> [[E]]) - ; CHECK: [[F:%.*]] = shufflevector <8 x i32> [[D]], <8 x i32> poison, <4 x i32> - ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v4i64_1_0t.v4i32(target("dx.RawBuffer", <4 x i64>, 1, 0) [[Buf]], i32 %index, i32 16, <4 x i32> [[F]]) + ; CHECK62: [[A:%.*]] = trunc <4 x i64> %0 to <4 x i32> + ; CHECK62: [[B:%.*]] = lshr <4 x i64> %0, splat (i64 32) + ; CHECK62: [[C:%.*]] = trunc <4 x i64> [[B]] to <4 x i32> + ; CHECK62: [[D:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[C]], <8 x i32> + ; CHECK62: [[E:%.*]] = shufflevector <8 x i32> [[D]], <8 x i32> poison, <4 x i32> + ; CHECK62: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v4i64_1_0t.v4i32(target("dx.RawBuffer", <4 x i64>, 1, 0) [[Buf]], i32 %index, i32 0, <4 x i32> [[E]]) + ; CHECK62: [[F:%.*]] = shufflevector <8 x i32> [[D]], <8 x i32> poison, <4 x i32> + ; CHECK62: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v4i64_1_0t.v4i32(target("dx.RawBuffer", <4 x i64>, 1, 0) [[Buf]], i32 %index, i32 16, <4 x i32> [[F]]) call void @llvm.dx.resource.store.rawbuffer( target("dx.RawBuffer", <4 x i64>, 1, 0) %buffer, i32 %index, i32 0, <4 x i64> %0)