From 4fbfa90618ec4902fa46f400a4f734927fb2c441 Mon Sep 17 00:00:00 2001 From: ranapratap55 Date: Fri, 16 May 2025 12:50:09 +0530 Subject: [PATCH 1/8] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 74 ++ clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 145 ++ clang/lib/Sema/SemaAMDGPU.cpp | 100 ++ .../test/CodeGen/builtins-image-load-2d-f32.c | 31 + clang/test/CodeGen/builtins-image-load.c | 1162 +++++++++++++++++ clang/test/CodeGen/builtins-image-store.c | 703 ++++++++++ .../SemaOpenCL/builtins-image-load-param.cl | 222 ++++ .../SemaOpenCL/builtins-image-store-param.cl | 145 ++ 8 files changed, 2582 insertions(+) create mode 100644 clang/test/CodeGen/builtins-image-load-2d-f32.c create mode 100644 clang/test/CodeGen/builtins-image-load.c create mode 100644 clang/test/CodeGen/builtins-image-store.c create mode 100644 clang/test/SemaOpenCL/builtins-image-load-param.cl create mode 100644 clang/test/SemaOpenCL/builtins-image-store-param.cl diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 3e45c04687a64..970eb68355c08 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -885,5 +885,79 @@ TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_16x8B, "vV2i*V2iIicC*", TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_load_8x16B, "V4iV4i*IicC*", "nc", "gfx1250-insts,wavefrontsize32") TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_8x16B, "vV4i*V4iIicC*", "nc", "gfx1250-insts,wavefrontsize32") +//===----------------------------------------------------------------------===// +// Image builtins +//===----------------------------------------------------------------------===// +TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f32_i32, "V4fiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f16_i32, "V4hiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f32_i32, "V4fiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f16_i32, "V4hiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f32_i32, "V4fiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f16_i32, "V4hiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_f32_i32, "fiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f32_i32, "V4fiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f16_i32, "V4hiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_f32_i32, "fiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_f32_i32, "fiiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f32_i32, "V4fiiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f16_i32, "V4hiiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f32_i32, "V4fiiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f16_i32, "V4hiiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f32_i32, "V4fiiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f16_i32, "V4hiiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f32_i32, "vV4fiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f16_i32, "vV4hiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f32_i32, "vV4fiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f16_i32, "vV4hiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_f32_i32, "vfiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f32_i32, "vV4fiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f16_i32, "vV4hiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_f32_i32, "vfiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f32_i32, "vV4fiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f16_i32, "vV4hiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_f32_i32, "vfiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_f32_i32, "vfiiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f32_i32, "vV4fiiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f16_i32, "vV4hiiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f32_i32, "vV4fiiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f16_i32, "vV4hiiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f32_i32, "vV4fiiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f16_i32, "vV4hiiiiiV8iii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_1d_v4f32_f32, "V4fifV8iV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_1d_v4f16_f32, "V4hifV8iV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_1darray_v4f32_f32, "V4fiffV8iV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_1darray_v4f16_f32, "V4hiffV8iV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_f32_f32, "fiffV8iV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_v4f32_f32, "V4fiffV8iV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_v4f16_f32, "V4hiffV8iV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_f32_f32, "fifffV8iV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_v4f32_f32, "V4fifffV8iV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_v4f16_f32, "V4hifffV8iV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f32_f32, "V4fifffV8iV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f16_f32, "V4hifffV8iV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f32_f32, "V4fifffV8iV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f16_f32, "V4hifffV8iV4ibii", "nc", "image-insts") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 6596ec06199dc..69e5201ff7e9e 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -181,6 +181,21 @@ static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, return Call; } +llvm::CallInst *EmitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF, + const clang::CallExpr *E, + unsigned IntrinsicID, + bool IsImageStore) { + clang::SmallVector Args; + for (unsigned I = 0; I < E->getNumArgs(); ++I) + Args.push_back(CGF.EmitScalarExpr(E->getArg(I))); + + llvm::Type *RetTy = CGF.ConvertType(E->getType()); + if (IsImageStore) + RetTy = CGF.VoidTy; + llvm::CallInst *Call = CGF.Builder.CreateIntrinsic(RetTy, IntrinsicID, Args); + return Call; +} + // Emit an intrinsic that has 1 float or double operand, and 1 integer. static Value *emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, @@ -937,6 +952,136 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateInsertElement(I0, A, 1); } + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_1d, false); + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_1darray, false); + case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_2d, false); + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_2darray, false); + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_3d, false); + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_cube, false); + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_mip_1d, false); + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_mip_1darray, false); + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_mip_2d, false); + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_mip_2darray, false); + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_mip_3d, false); + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_mip_cube, false); + case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_1d, true); + case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_1darray, true); + case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_2d, true); + case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_2darray, true); + case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_3d, true); + case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_cube, true); + case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_mip_1d, true); + case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_mip_1darray, true); + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_mip_2d, true); + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_mip_2darray, true); + case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_mip_3d, true); + case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_mip_cube, true); + case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_1d, false); + case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_1darray, false); + case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_2d, false); + case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_2darray, false); + case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_3d, false); + case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: + return EmitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_cube, false); case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4: case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: { llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8); diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index 3a0c23187d45d..1eaaa838fa3ce 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -111,6 +111,106 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B: case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: return checkCoopAtomicFunctionCall(TheCall, /*IsStore=*/true); + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: { + StringRef FeatureList(getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID)); + if (!Builtin::evaluateRequiredTargetFeatures( + FeatureList, CallerFeatureMap)){ + Diag(TheCall->getBeginLoc(), diag::err_builtin_needs_feature) + << FD->getDeclName() << FeatureList; + return false; + } + + unsigned ArgCount = TheCall->getNumArgs() - 1; + llvm::APSInt Result; + + return ((SemaRef.BuiltinConstantArg(TheCall, 0, Result)) || + (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) || + (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result))); + } + case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: { + StringRef FeatureList(getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID)); + if (!Builtin::evaluateRequiredTargetFeatures( + FeatureList, CallerFeatureMap)){ + Diag(TheCall->getBeginLoc(), diag::err_builtin_needs_feature) + << FD->getDeclName() << FeatureList; + return false; + } + + unsigned ArgCount = TheCall->getNumArgs() - 1; + llvm::APSInt Result; + + return ((SemaRef.BuiltinConstantArg(TheCall, 1, Result)) || + (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) || + (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result))); + } default: return false; } diff --git a/clang/test/CodeGen/builtins-image-load-2d-f32.c b/clang/test/CodeGen/builtins-image-load-2d-f32.c new file mode 100644 index 0000000000000..78dab461c1f38 --- /dev/null +++ b/clang/test/CodeGen/builtins-image-load-2d-f32.c @@ -0,0 +1,31 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | FileCheck %s + +#pragma OPENCL EXTENSION cl_khr_fp64:enable + +typedef int v8i __attribute__((ext_vector_type(8))); + +// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d( +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VECI32:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VECI32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VECI32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VECI32_ADDR]] to ptr +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VECI32]], ptr [[VECI32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VECI32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 0, i32 0) +// CHECK-NEXT: ret float [[TMP3]] +// +float test_builtin_image_load_2d(float f32, int i32, v8i veci32) { + + return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, veci32); +} diff --git a/clang/test/CodeGen/builtins-image-load.c b/clang/test/CodeGen/builtins-image-load.c new file mode 100644 index 0000000000000..67548a567723e --- /dev/null +++ b/clang/test/CodeGen/builtins-image-load.c @@ -0,0 +1,1162 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 %s -emit-llvm -o - | FileCheck %s + +typedef int int8 __attribute__((ext_vector_type(8))); +typedef int int4 __attribute__((ext_vector_type(4))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef _Float16 half; +typedef half half4 __attribute__((ext_vector_type(4))); + +// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d( +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 12, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 106, i32 103) +// CHECK-NEXT: ret float [[TMP3]] +// +float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_f32_i32(12, i32, i32, vec8i32, 106, 103); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP3]] +// +float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP3]] +// +half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_builtin_image_load_2darray( +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP4]] +// +float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2darray_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2darray_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP1]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1d_v4f32_i32(100, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP1]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP2]] +// +half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1darray_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP3]] +// +float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1darray_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP3]] +// +half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_3d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_3d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_3d_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_cube_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_cube_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_cube_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_cube_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP3]] +// +float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(100, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP3]] +// +half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1darray_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1darray_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2d( +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.mip.2d.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP4]] +// +float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2d_f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2darray( +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.load.mip.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP5]] +// +float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2darray_f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2darray_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2darray_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_3d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_3d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_cube_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_cube_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_1d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TMP1]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP3]] +// +float4 test_builtin_amdgcn_image_sample_1d_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) { + return __builtin_amdgcn_image_sample_1d_v4f32_f32(100, f32, vec8i32, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_1d_v4f16_f32( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TMP1]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP3]] +// +half4 test_builtin_amdgcn_image_sample_1d_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) { + return __builtin_amdgcn_image_sample_1d_v4f16_f32(100, f32, vec8i32, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_1darray_v4f32_f32( +// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_amdgcn_image_sample_1darray_v4f32_f32(int i32, float f32, int8 vec8i32, int4 vec4i32) { + return __builtin_amdgcn_image_sample_1darray_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_1darray_v4f16_f32( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_amdgcn_image_sample_1darray_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) { + return __builtin_amdgcn_image_sample_1darray_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_builtin_amdgcn_image_sample_2d_f32_f32( +// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.sample.2d.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP4]] +// +float test_builtin_amdgcn_image_sample_2d_f32_f32(int i32, float f32, int8 vec8i32, int4 vec4i32) { + return __builtin_amdgcn_image_sample_2d_f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_2d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_amdgcn_image_sample_2d_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) { + return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_2d_v4f16_f32( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_amdgcn_image_sample_2d_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) { + return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_builtin_amdgcn_image_sample_2darray_f32_f32( +// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.2darray.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP5]] +// +float test_builtin_amdgcn_image_sample_2darray_f32_f32(int i32, float f32, int8 vec8i32, int4 vec4i32) { + return __builtin_amdgcn_image_sample_2darray_f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_2darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_builtin_amdgcn_image_sample_2darray_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) { + return __builtin_amdgcn_image_sample_2darray_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_2darray_v4f16_f32( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_builtin_amdgcn_image_sample_2darray_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) { + return __builtin_amdgcn_image_sample_2darray_v4f16_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_3d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_builtin_amdgcn_image_sample_3d_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) { + return __builtin_amdgcn_image_sample_3d_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_3d_v4f16_f32( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.3d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_builtin_amdgcn_image_sample_3d_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) { + return __builtin_amdgcn_image_sample_3d_v4f16_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_cube_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_builtin_amdgcn_image_sample_cube_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) { + return __builtin_amdgcn_image_sample_cube_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_cube_v4f16_f32( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.cube.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_builtin_amdgcn_image_sample_cube_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) { + return __builtin_amdgcn_image_sample_cube_v4f16_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); +} diff --git a/clang/test/CodeGen/builtins-image-store.c b/clang/test/CodeGen/builtins-image-store.c new file mode 100644 index 0000000000000..cd2b09e074c59 --- /dev/null +++ b/clang/test/CodeGen/builtins-image-store.c @@ -0,0 +1,703 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 %s -emit-llvm -o - | FileCheck %s + +typedef int int8 __attribute__((ext_vector_type(8))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef _Float16 half; +typedef half half4 __attribute__((ext_vector_type(4))); + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_2d( +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.f32.i32.v8i32(float [[TMP0]], i32 12, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 106, i32 103) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_2d(float f32, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_2d_f32_i32(f32, 12, i32, i32, vec8i32, 106, 103); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_2d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_2d_1(float4 v4f32, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_2d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_2d_2(half4 v4f16, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray( +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_2darray(float f32, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_2darray_f32_i32(f32, 100, i32, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_2darray_1(float4 v4f32, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_2darray_2(half4 v4f16, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_1d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_1d_1(float4 v4f32, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_1d_v4f32_i32(v4f32, 100, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_1d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_1d_2(half4 v4f16, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_1d_v4f16_i32(v4f16, 100, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_1darray_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_1darray_1(float4 v4f32, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_1darray_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_1darray_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_1darray_2(half4 v4f16, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_1darray_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_3d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.3d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_3d_1(float4 v4f32, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_3d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_3d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.3d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_3d_2(half4 v4f16, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_3d_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_cube_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.cube.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_cube_1(float4 v4f32, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_cube_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_cube_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.cube.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_cube_2(half4 v4f16, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_cube_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_mip_1d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_mip_1d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1darray_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_mip_1darray_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1darray_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_mip_1darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d( +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_mip_2d(float f32, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_mip_2d_f32_i32(f32, 100, i32, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_mip_2d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_mip_2d_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray( +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_mip_2darray(float f32, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_mip_2darray_f32_i32(f32, 100, i32, i32, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_mip_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_mip_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_3d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_mip_3d_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_3d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.3d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_mip_3d_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_cube_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_mip_cube_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110); + } + +// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_cube_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.cube.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110) +// CHECK-NEXT: ret void +// +void test_builtin_image_store_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_mip_cube_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110); + } diff --git a/clang/test/SemaOpenCL/builtins-image-load-param.cl b/clang/test/SemaOpenCL/builtins-image-load-param.cl new file mode 100644 index 0000000000000..249bb9211ab75 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-image-load-param.cl @@ -0,0 +1,222 @@ +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 -S -verify=expected -o - %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -S -verify=expected -o - %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx942 -S -verify=GFX94 -o - %s +// REQUIRES: amdgpu-registered-target + +typedef int int8 __attribute__((ext_vector_type(8))); +typedef int int4 __attribute__((ext_vector_type(4))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef half half4 __attribute__((ext_vector_type(4))); + +float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, i32, vec8i32, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_f32_i32' must be a constant integer}} +} +float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f16_i32' must be a constant integer}} +} + + +float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_f32_i32' must be a constant integer}} +} +float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1d_v4f32_i32(i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_3d_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_cube_v4f32_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_cube_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f16_i32' must be a constant integer}} +} + +float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2d_f32_i32(i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_f32_i32' must be a constant integer}} +} +float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f16_i32' must be a constant integer}} +} + +float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2darray_f32_i32(i32, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_f32_i32' must be a constant integer}} +} +float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(i32, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f16_i32' must be a constant integer}} +} + +float test_builtin_image_load_2d_gfx(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_f32_i32(12, i32, i32, vec8i32, 106, 103); //GFX94-error{{'__builtin_amdgcn_image_load_2d_f32_i32' needs target feature image-insts}} +} +float4 test_builtin_image_load_2d_gfx_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_load_2d_v4f32_i32' needs target feature image-insts}} +} +half4 test_builtin_image_load_2d_gfx_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_load_2d_v4f16_i32' needs target feature image-insts}} +} + +float test_builtin_image_sample_2d(float f32, int i32, int8 vec8i32, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2d_f32_f32(i32, f32, f32, vec8i32, vec4i32, 0, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_f32_f32' must be a constant integer}} +} +float4 test_builtin_image_sample_2d_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_v4f32_f32' must be a constant integer}} +} +half4 test_builtin_image_sample_2d_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_v4f16_f32' must be a constant integer}} +} + +float test_builtin_image_sample_2darray(float f32, int i32, int8 vec8i32, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2darray_f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_f32_f32' must be a constant integer}} +} +float4 test_builtin_image_sample_2darray_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2darray_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_v4f32_f32' must be a constant integer}} +} +half4 test_builtin_image_sample_2darray_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2darray_v4f16_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_v4f16_f32' must be a constant integer}} +} + +float4 test_builtin_image_sample_1d_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_1d_v4f32_f32(i32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_1d_v4f32_f32' must be a constant integer}} +} +half4 test_builtin_image_sample_1d_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_1d_v4f16_f32(100, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_1d_v4f16_f32' must be a constant integer}} +} + +float4 test_builtin_image_sample_1darray_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_1darray_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_1darray_v4f32_f32' must be a constant integer}} +} +half4 test_builtin_image_sample_1darray_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_1darray_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_1darray_v4f16_f32' must be a constant integer}} +} + +float4 test_builtin_image_sample_3d_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_3d_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_3d_v4f32_f32' must be a constant integer}} +} +half4 test_builtin_image_sample_3d_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_3d_v4f16_f32(i32, f32, f32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_3d_v4f16_f32' must be a constant integer}} +} + +float4 test_builtin_image_sample_cube_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_cube_v4f32_f32(i32, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_cube_v4f32_f32' must be a constant integer}} +} +half4 test_builtin_image_sample_cube_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_cube_v4f16_f32(i32, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_cube_v4f16_f32' must be a constant integer}} +} + +float test_builtin_image_sample_2d_gfx(float f32, int i32, int8 vec8i32, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2d_f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_sample_2d_f32_f32' needs target feature image-insts}} +} +float4 test_builtin_image_sample_2d_gfx_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_sample_2d_v4f32_f32' needs target feature image-insts}} +} +half4 test_builtin_image_sample_2d_gfx_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_sample_2d_v4f16_f32' needs target feature image-insts}} +} \ No newline at end of file diff --git a/clang/test/SemaOpenCL/builtins-image-store-param.cl b/clang/test/SemaOpenCL/builtins-image-store-param.cl new file mode 100644 index 0000000000000..f84df77171098 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-image-store-param.cl @@ -0,0 +1,145 @@ +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -S -verify=expected -o - %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx942 -S -verify=GFX94 -o - %s +// REQUIRES: amdgpu-registered-target + +typedef int int8 __attribute__((ext_vector_type(8))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef half half4 __attribute__((ext_vector_type(4))); + + +void test_builtin_image_store_2d(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_2d_f32_i32(f32, i32, i32, i32, vec8i32, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_f32_i32' must be a constant integer}} +} +void test_builtin_image_store_2d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_2d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_2darray(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_2darray_f32_i32(f32, 100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_f32_i32' must be a constant integer}} +} +void test_builtin_image_store_2darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_2darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_1d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_1d_v4f32_i32(v4f32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_1d_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_1d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_1d_v4f16_i32(v4f16, 100, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_1d_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_1darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_1darray_v4f32_i32(v4f32, 100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_1darray_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_1darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_1darray_v4f16_i32(v4f16, 100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_1darray_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_3d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_3d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_3d_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_3d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_3d_v4f16_i32(v4f16, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_3d_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_cube_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_cube_v4f32_i32(v4f32, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_cube_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_cube_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_cube_v4f16_i32(v4f16, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_cube_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_mip_1d_v4f32_i32(v4f32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1d_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_mip_1d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1d_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_mip_1darray_v4f32_i32(v4f32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1darray_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_mip_1darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1darray_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_mip_2d(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_mip_2d_f32_i32(f32, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_f32_i32' must be a constant integer}} +} +void test_builtin_image_store_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_mip_2d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_mip_2d_v4f16_i32(v4f16, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_mip_2darray(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_mip_2darray_f32_i32(f32, i32, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_f32_i32' must be a constant integer}} +} +void test_builtin_image_store_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_mip_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_mip_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_mip_3d_v4f32_i32(v4f32, i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_3d_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_mip_3d_v4f16_i32(v4f16, i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_3d_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_mip_cube_v4f32_i32(v4f32, i32, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_cube_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_store_mip_cube_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_cube_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_2d_gfx(float f32, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_2d_f32_i32(f32, 12, i32, i32, vec8i32, 106, 103); //GFX94-error{{'__builtin_amdgcn_image_store_2d_f32_i32' needs target feature image-insts}} +} +void test_builtin_image_store_2d_gfx_1(float4 v4f32, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_store_2d_v4f32_i32' needs target feature image-insts}} + } + void test_builtin_image_store_2d_gfx_2(half4 v4f16, int i32, int8 vec8i32) { + + __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_store_2d_v4f16_i32' needs target feature image-insts}} + } From 90065eeb87fb8f5bb831eb4f9659844afc4a4b44 Mon Sep 17 00:00:00 2001 From: ranapratap55 Date: Wed, 18 Jun 2025 10:55:22 +0530 Subject: [PATCH 2/8] [AMDGPU] Adds builtins for image load and sema checking for image load --- clang/include/clang/Sema/SemaAMDGPU.h | 2 + clang/lib/Sema/SemaAMDGPU.cpp | 32 + .../test/CodeGen/builtins-image-load-2d-f32.c | 715 +++++++++++++++++- .../builtins-image-load-2d-f32-param.cl | 132 ++++ 4 files changed, 869 insertions(+), 12 deletions(-) create mode 100644 clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl diff --git a/clang/include/clang/Sema/SemaAMDGPU.h b/clang/include/clang/Sema/SemaAMDGPU.h index bac812a9d4fcf..43dea8445ef3d 100644 --- a/clang/include/clang/Sema/SemaAMDGPU.h +++ b/clang/include/clang/Sema/SemaAMDGPU.h @@ -31,6 +31,8 @@ class SemaAMDGPU : public SemaBase { bool checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs, unsigned NumDataArgs); + bool checkImageImmArgFunctionCall(CallExpr *TheCall, unsigned ArgCount); + /// Create an AMDGPUWavesPerEUAttr attribute. AMDGPUFlatWorkGroupSizeAttr * CreateAMDGPUFlatWorkGroupSizeAttr(const AttributeCommonInfo &CI, Expr *Min, diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index 1eaaa838fa3ce..666645628decc 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -211,6 +211,38 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) || (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result))); } + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: { + unsigned ArgCount = TheCall->getNumArgs() - 1; + + return checkImageImmArgFunctionCall(TheCall, ArgCount); + } default: return false; } diff --git a/clang/test/CodeGen/builtins-image-load-2d-f32.c b/clang/test/CodeGen/builtins-image-load-2d-f32.c index 78dab461c1f38..aee97af37aaf0 100644 --- a/clang/test/CodeGen/builtins-image-load-2d-f32.c +++ b/clang/test/CodeGen/builtins-image-load-2d-f32.c @@ -1,31 +1,722 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa %s -emit-llvm -o - | FileCheck %s -#pragma OPENCL EXTENSION cl_khr_fp64:enable - -typedef int v8i __attribute__((ext_vector_type(8))); +typedef int int8 __attribute__((ext_vector_type(8))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef _Float16 half; +typedef half half4 __attribute__((ext_vector_type(4))); // CHECK-LABEL: define dso_local float @test_builtin_image_load_2d( -// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VECI32:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VECI32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VECI32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VECI32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VECI32]], ptr [[VECI32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VECI32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 0, i32 0) +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 12, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 106, i32 103) // CHECK-NEXT: ret float [[TMP3]] // -float test_builtin_image_load_2d(float f32, int i32, v8i veci32) { +float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_f32_i32(12, i32, i32, vec8i32, 106, 103); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP3]] +// +float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP3]] +// +half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_builtin_image_load_2darray( +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP4]] +// +float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2darray_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2darray_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP3]] +// +float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1d_v4f32_i32(100, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP3]] +// +half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1darray_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1darray_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_3d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_3d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_3d_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_cube_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_cube_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_cube_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_cube_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(100, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1darray_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1darray_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2d( +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.mip.2d.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP4]] +// +float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2d_f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2darray( +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.load.mip.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP5]] +// +float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2darray_f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2darray_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2darray_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_3d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_3d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_cube_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_cube_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) { - return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, veci32); + return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); } diff --git a/clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl b/clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl new file mode 100644 index 0000000000000..7b5aab4011da9 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl @@ -0,0 +1,132 @@ +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -S -verify=expected -o - %s +// REQUIRES: amdgpu-registered-target + +typedef int int8 __attribute__((ext_vector_type(8))); +typedef float float4 __attribute__((ext_vector_type(4))); +//typedef _Float16 half; +typedef half half4 __attribute__((ext_vector_type(4))); + + +float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, i32, vec8i32, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_f32_i32' must be a constant integer}} +} +float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f16_i32' must be a constant integer}} +} + +float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_f32_i32' must be a constant integer}} +} +float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1d_v4f32_i32(i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_3d_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_cube_v4f32_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_cube_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f16_i32' must be a constant integer}} +} + +float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2d_f32_i32(i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_f32_i32' must be a constant integer}} +} +float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f16_i32' must be a constant integer}} +} + +float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2darray_f32_i32(i32, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_f32_i32' must be a constant integer}} +} +float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(i32, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f16_i32' must be a constant integer}} +} From 21c682925d8c7d6ff2e5b286fc6ee6ef09f5e016 Mon Sep 17 00:00:00 2001 From: ranapratap55 Date: Thu, 26 Jun 2025 14:36:33 +0530 Subject: [PATCH 3/8] [AMDGPU] Adds builtins for image store and sema checking for image store --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 29 + clang/include/clang/Sema/SemaAMDGPU.h | 2 - clang/lib/Sema/SemaAMDGPU.cpp | 50 +- .../test/CodeGen/builtins-image-load-2d-f32.c | 722 ------------------ clang/test/CodeGen/builtins-image-load.c | 15 - .../builtins-image-load-2d-f32-param.cl | 132 ---- .../SemaOpenCL/builtins-image-load-param.cl | 3 +- 7 files changed, 78 insertions(+), 875 deletions(-) delete mode 100644 clang/test/CodeGen/builtins-image-load-2d-f32.c delete mode 100644 clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 970eb68355c08..7202a69c1f2cf 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -959,5 +959,34 @@ TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f16_f32, "V4hifffV8iV4ibii", " TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f32_f32, "V4fifffV8iV4ibii", "nc", "image-insts") TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f16_f32, "V4hifffV8iV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f32_i32, "vV4fiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f16_i32, "vV4hiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f32_i32, "vV4fiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f16_i32, "vV4hiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_f32_i32, "vfiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f32_i32, "vV4fiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f16_i32, "vV4hiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_f32_i32, "vfiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f32_i32, "vV4fiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f16_i32, "vV4hiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f32_i32, "vV4fiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f16_i32, "vV4hiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f32_i32, "vV4fiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f16_i32, "vV4hiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_f32_i32, "vfiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f32_i32, "vV4fiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f16_i32, "vV4hiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_f32_i32, "vfiiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f32_i32, "vV4fiiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f16_i32, "vV4hiiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f32_i32, "vV4fiiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f16_i32, "vV4hiiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f32_i32, "vV4fiiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f16_i32, "vV4hiiiiiV8iii", "nc", "") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/include/clang/Sema/SemaAMDGPU.h b/clang/include/clang/Sema/SemaAMDGPU.h index 43dea8445ef3d..bac812a9d4fcf 100644 --- a/clang/include/clang/Sema/SemaAMDGPU.h +++ b/clang/include/clang/Sema/SemaAMDGPU.h @@ -31,8 +31,6 @@ class SemaAMDGPU : public SemaBase { bool checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs, unsigned NumDataArgs); - bool checkImageImmArgFunctionCall(CallExpr *TheCall, unsigned ArgCount); - /// Create an AMDGPUWavesPerEUAttr attribute. AMDGPUFlatWorkGroupSizeAttr * CreateAMDGPUFlatWorkGroupSizeAttr(const AttributeCommonInfo &CI, Expr *Min, diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index 666645628decc..2f2d6ec94de23 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -240,8 +240,54 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: { unsigned ArgCount = TheCall->getNumArgs() - 1; - - return checkImageImmArgFunctionCall(TheCall, ArgCount); + llvm::APSInt Result; + bool isImmArg = + (!(SemaRef.BuiltinConstantArg(TheCall, 0, Result)) && + !(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) && + !(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result))) + ? false + : true; + + return isImmArg; + } + case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: { + unsigned ArgCount = TheCall->getNumArgs() - 1; + llvm::APSInt Result; + bool isImmArg = + (!(SemaRef.BuiltinConstantArg(TheCall, 1, Result)) && + !(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) && + !(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result))) + ? false + : true; + + return isImmArg; } default: return false; diff --git a/clang/test/CodeGen/builtins-image-load-2d-f32.c b/clang/test/CodeGen/builtins-image-load-2d-f32.c deleted file mode 100644 index aee97af37aaf0..0000000000000 --- a/clang/test/CodeGen/builtins-image-load-2d-f32.c +++ /dev/null @@ -1,722 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa %s -emit-llvm -o - | FileCheck %s - -typedef int int8 __attribute__((ext_vector_type(8))); -typedef float float4 __attribute__((ext_vector_type(4))); -typedef _Float16 half; -typedef half half4 __attribute__((ext_vector_type(4))); - -// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d( -// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 12, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 106, i32 103) -// CHECK-NEXT: ret float [[TMP3]] -// -float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2d_f32_i32(12, i32, i32, vec8i32, 106, 103); -} - -// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2d_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x float> [[TMP3]] -// -float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, 120, 110); -} -// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2d_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x half> [[TMP3]] -// -half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, 110); -} - -// CHECK-LABEL: define dso_local float @test_builtin_image_load_2darray( -// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) -// CHECK-NEXT: ret float [[TMP4]] -// -float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, 120, 110); -} -// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2darray_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x float> [[TMP4]] -// -float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); -} -// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2darray_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x half> [[TMP4]] -// -half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); -} - -// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1d_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP2]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x float> [[TMP3]] -// -float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_1d_v4f32_i32(100, i32, vec8i32, 120, 110); -} -// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1d_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP2]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x half> [[TMP3]] -// -half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, vec8i32, 120, 110); -} - -// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1darray_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x float> [[TMP4]] -// -float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, 120, 110); -} -// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1darray_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x half> [[TMP4]] -// -half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, vec8i32, 120, 110); -} - -// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_3d_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x float> [[TMP4]] -// -float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); -} -// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_3d_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x half> [[TMP4]] -// -half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_3d_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); -} - -// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_cube_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x float> [[TMP4]] -// -float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_cube_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); -} -// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_cube_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x half> [[TMP4]] -// -half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_cube_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); -} - -// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1d_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x float> [[TMP4]] -// -float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(100, i32, i32, vec8i32, 120, 110); -} -// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1d_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x half> [[TMP4]] -// -half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, vec8i32, 120, 110); -} - -// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1darray_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x float> [[TMP4]] -// -float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); -} -// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1darray_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x half> [[TMP4]] -// -half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); -} - -// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2d( -// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.mip.2d.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) -// CHECK-NEXT: ret float [[TMP4]] -// -float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_2d_f32_i32(100, i32, i32, i32, vec8i32, 120, 110); -} -// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2d_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x float> [[TMP4]] -// -float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); -} -// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2d_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x half> [[TMP4]] -// -half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); -} - -// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2darray( -// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.load.mip.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) -// CHECK-NEXT: ret float [[TMP5]] -// -float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_2darray_f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); -} -// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2darray_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x float> [[TMP5]] -// -float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); -} -// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2darray_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x half> [[TMP5]] -// -half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); -} - -// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_3d_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x float> [[TMP5]] -// -float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); -} -// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_3d_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x half> [[TMP5]] -// -half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); -} - -// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_cube_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x float> [[TMP5]] -// -float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); -} -// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_cube_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) -// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr -// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr -// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) -// CHECK-NEXT: ret <4 x half> [[TMP5]] -// -half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); -} diff --git a/clang/test/CodeGen/builtins-image-load.c b/clang/test/CodeGen/builtins-image-load.c index 67548a567723e..3c3e1ec35806f 100644 --- a/clang/test/CodeGen/builtins-image-load.c +++ b/clang/test/CodeGen/builtins-image-load.c @@ -56,7 +56,6 @@ float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, 120, 110); } - // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2d_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -107,7 +106,6 @@ float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, 120, 110); } - // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2darray_1( // CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -133,7 +131,6 @@ float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); } - // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2darray_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -183,7 +180,6 @@ float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_1d_v4f32_i32(100, i32, vec8i32, 120, 110); } - // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1d_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -232,7 +228,6 @@ float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, 120, 110); } - // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1darray_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -283,7 +278,6 @@ float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); } - // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_3d_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -335,7 +329,6 @@ float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_cube_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); } - // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_cube_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -386,7 +379,6 @@ float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(100, i32, i32, vec8i32, 120, 110); } - // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1d_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -437,7 +429,6 @@ float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32 return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); } - // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1darray_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -489,7 +480,6 @@ float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_mip_2d_f32_i32(100, i32, i32, i32, vec8i32, 120, 110); } - // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2d_1( // CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -515,7 +505,6 @@ float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); } - // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2d_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -568,7 +557,6 @@ float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_mip_2darray_f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); } - // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2darray_1( // CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -595,7 +583,6 @@ float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32 return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); } - // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2darray_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -649,7 +636,6 @@ float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); } - // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_3d_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -703,7 +689,6 @@ float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); } - // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_cube_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] diff --git a/clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl b/clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl deleted file mode 100644 index 7b5aab4011da9..0000000000000 --- a/clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl +++ /dev/null @@ -1,132 +0,0 @@ -// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -S -verify=expected -o - %s -// REQUIRES: amdgpu-registered-target - -typedef int int8 __attribute__((ext_vector_type(8))); -typedef float float4 __attribute__((ext_vector_type(4))); -//typedef _Float16 half; -typedef half half4 __attribute__((ext_vector_type(4))); - - -float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, i32, vec8i32, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_f32_i32' must be a constant integer}} -} -float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f16_i32' must be a constant integer}} -} - -float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_f32_i32' must be a constant integer}} -} -float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f16_i32' must be a constant integer}} -} - -float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_1d_v4f32_i32(i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f16_i32' must be a constant integer}} -} - -float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f16_i32' must be a constant integer}} -} - -float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_3d_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f16_i32' must be a constant integer}} -} - -float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_cube_v4f32_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_cube_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f16_i32' must be a constant integer}} -} - -float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f16_i32' must be a constant integer}} -} - -float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f16_i32' must be a constant integer}} -} - -float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_2d_f32_i32(i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_f32_i32' must be a constant integer}} -} -float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f16_i32' must be a constant integer}} -} - -float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_2darray_f32_i32(i32, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_f32_i32' must be a constant integer}} -} -float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f16_i32' must be a constant integer}} -} - -float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f16_i32' must be a constant integer}} -} - -float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(i32, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f16_i32' must be a constant integer}} -} diff --git a/clang/test/SemaOpenCL/builtins-image-load-param.cl b/clang/test/SemaOpenCL/builtins-image-load-param.cl index 249bb9211ab75..03afbde19a359 100644 --- a/clang/test/SemaOpenCL/builtins-image-load-param.cl +++ b/clang/test/SemaOpenCL/builtins-image-load-param.cl @@ -21,7 +21,6 @@ half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f16_i32' must be a constant integer}} } - float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_f32_i32' must be a constant integer}} @@ -219,4 +218,4 @@ float4 test_builtin_image_sample_2d_gfx_1(float4 v4f32, float f32, int i32, int8 half4 test_builtin_image_sample_2d_gfx_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) { return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_sample_2d_v4f16_f32' needs target feature image-insts}} -} \ No newline at end of file +} From 78caf4feae75859bcdb12d6f187a1e06c920a7dc Mon Sep 17 00:00:00 2001 From: ranapratap55 Date: Tue, 12 Aug 2025 13:19:37 +0530 Subject: [PATCH 4/8] [AMDGPU] Adds EmitAMDGCNImageOverloadedReturnType for amdgcn_image_load/store and adds 'image-insts' feature --- clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 3 + clang/lib/Sema/SemaAMDGPU.cpp | 79 +-------------------- clang/test/CodeGen/builtins-image-load.c | 15 ++++ 3 files changed, 19 insertions(+), 78 deletions(-) diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 69e5201ff7e9e..47a975ca78df4 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -10,9 +10,12 @@ // //===----------------------------------------------------------------------===// +#include "CodeGenFunction.h" #include "CGBuiltin.h" #include "clang/Basic/TargetBuiltins.h" +#include "clang/Frontend/FrontendDiagnostic.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" #include "llvm/IR/MemoryModelRelaxationAnnotations.h" diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index 2f2d6ec94de23..d08778f2d1f79 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -13,6 +13,7 @@ #include "clang/Sema/SemaAMDGPU.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Basic/TargetBuiltins.h" +#include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Sema/Ownership.h" #include "clang/Sema/Sema.h" #include "llvm/Support/AMDGPUAddrSpace.h" @@ -211,84 +212,6 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) || (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result))); } - case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: { - unsigned ArgCount = TheCall->getNumArgs() - 1; - llvm::APSInt Result; - bool isImmArg = - (!(SemaRef.BuiltinConstantArg(TheCall, 0, Result)) && - !(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) && - !(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result))) - ? false - : true; - - return isImmArg; - } - case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: { - unsigned ArgCount = TheCall->getNumArgs() - 1; - llvm::APSInt Result; - bool isImmArg = - (!(SemaRef.BuiltinConstantArg(TheCall, 1, Result)) && - !(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) && - !(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result))) - ? false - : true; - - return isImmArg; - } default: return false; } diff --git a/clang/test/CodeGen/builtins-image-load.c b/clang/test/CodeGen/builtins-image-load.c index 3c3e1ec35806f..67548a567723e 100644 --- a/clang/test/CodeGen/builtins-image-load.c +++ b/clang/test/CodeGen/builtins-image-load.c @@ -56,6 +56,7 @@ float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, 120, 110); } + // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2d_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -106,6 +107,7 @@ float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, 120, 110); } + // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2darray_1( // CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -131,6 +133,7 @@ float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); } + // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2darray_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -180,6 +183,7 @@ float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_1d_v4f32_i32(100, i32, vec8i32, 120, 110); } + // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1d_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -228,6 +232,7 @@ float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, 120, 110); } + // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1darray_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -278,6 +283,7 @@ float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); } + // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_3d_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -329,6 +335,7 @@ float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_cube_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); } + // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_cube_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -379,6 +386,7 @@ float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(100, i32, i32, vec8i32, 120, 110); } + // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1d_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -429,6 +437,7 @@ float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32 return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); } + // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1darray_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -480,6 +489,7 @@ float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_mip_2d_f32_i32(100, i32, i32, i32, vec8i32, 120, 110); } + // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2d_1( // CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -505,6 +515,7 @@ float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); } + // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2d_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -557,6 +568,7 @@ float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_mip_2darray_f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); } + // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2darray_1( // CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -583,6 +595,7 @@ float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32 return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); } + // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2darray_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -636,6 +649,7 @@ float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); } + // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_3d_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -689,6 +703,7 @@ float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); } + // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_cube_2( // CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] From f27e89abd9d70d8af6eec2b8571a78843700ae17 Mon Sep 17 00:00:00 2001 From: ranapratap55 Date: Tue, 9 Sep 2025 10:37:23 +0530 Subject: [PATCH 5/8] [AMDGPU] Extends builtin support for amdgcn_image_sample and adds sema checking tests --- clang/test/SemaOpenCL/builtins-image-load-param.cl | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/test/SemaOpenCL/builtins-image-load-param.cl b/clang/test/SemaOpenCL/builtins-image-load-param.cl index 03afbde19a359..0da4831e847f7 100644 --- a/clang/test/SemaOpenCL/builtins-image-load-param.cl +++ b/clang/test/SemaOpenCL/builtins-image-load-param.cl @@ -21,6 +21,7 @@ half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f16_i32' must be a constant integer}} } + float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) { return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_f32_i32' must be a constant integer}} From 46c833f037c2973c2e15437973e6b98f6e8a8da8 Mon Sep 17 00:00:00 2001 From: ranapratap55 Date: Tue, 7 Oct 2025 11:26:08 +0530 Subject: [PATCH 6/8] Using image desc as opaque ptr type for image load/store/sample --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 169 ++--- clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 74 +- clang/test/CodeGen/builtins-image-load.c | 715 +++++++++--------- clang/test/CodeGen/builtins-image-store.c | 477 ++++++------ .../builtins-image-load-param-gfx1100.cl | 194 +++++ .../builtins-image-load-param-gfx942.cl | 219 ++++++ .../SemaOpenCL/builtins-image-load-param.cl | 222 ------ .../builtins-image-store-param-gfx1100.cl | 129 ++++ .../builtins-image-store-param-gfx942.cl | 129 ++++ .../SemaOpenCL/builtins-image-store-param.cl | 145 ---- 10 files changed, 1443 insertions(+), 1030 deletions(-) create mode 100644 clang/test/SemaOpenCL/builtins-image-load-param-gfx1100.cl create mode 100644 clang/test/SemaOpenCL/builtins-image-load-param-gfx942.cl delete mode 100644 clang/test/SemaOpenCL/builtins-image-load-param.cl create mode 100644 clang/test/SemaOpenCL/builtins-image-store-param-gfx1100.cl create mode 100644 clang/test/SemaOpenCL/builtins-image-store-param-gfx942.cl delete mode 100644 clang/test/SemaOpenCL/builtins-image-store-param.cl diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 7202a69c1f2cf..8428fa97fe445 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -888,105 +888,76 @@ TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_8x16B, "vV4i*V4iIicC*", //===----------------------------------------------------------------------===// // Image builtins //===----------------------------------------------------------------------===// -TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f32_i32, "V4fiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f16_i32, "V4hiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f32_i32, "V4fiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f16_i32, "V4hiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f32_i32, "V4fiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f16_i32, "V4hiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_f32_i32, "fiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f32_i32, "V4fiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f16_i32, "V4hiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_f32_i32, "fiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_f32_i32, "fiiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f32_i32, "V4fiiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f16_i32, "V4hiiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f32_i32, "V4fiiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f16_i32, "V4hiiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f32_i32, "V4fiiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f16_i32, "V4hiiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f32_i32, "vV4fiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f16_i32, "vV4hiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f32_i32, "vV4fiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f16_i32, "vV4hiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_f32_i32, "vfiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f32_i32, "vV4fiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f16_i32, "vV4hiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_f32_i32, "vfiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f32_i32, "vV4fiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f16_i32, "vV4hiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_f32_i32, "vfiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_f32_i32, "vfiiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f32_i32, "vV4fiiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f16_i32, "vV4hiiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f32_i32, "vV4fiiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f16_i32, "vV4hiiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f32_i32, "vV4fiiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f16_i32, "vV4hiiiiiV8iii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_sample_1d_v4f32_f32, "V4fifV8iV4ibii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_sample_1d_v4f16_f32, "V4hifV8iV4ibii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_sample_1darray_v4f32_f32, "V4fiffV8iV4ibii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_sample_1darray_v4f16_f32, "V4hiffV8iV4ibii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_f32_f32, "fiffV8iV4ibii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_v4f32_f32, "V4fiffV8iV4ibii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_v4f16_f32, "V4hiffV8iV4ibii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_f32_f32, "fifffV8iV4ibii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_v4f32_f32, "V4fifffV8iV4ibii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_v4f16_f32, "V4hifffV8iV4ibii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f32_f32, "V4fifffV8iV4ibii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f16_f32, "V4hifffV8iV4ibii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f32_f32, "V4fifffV8iV4ibii", "nc", "image-insts") -TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f16_f32, "V4hifffV8iV4ibii", "nc", "image-insts") - -TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f32_i32, "vV4fiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f16_i32, "vV4hiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f32_i32, "vV4fiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f16_i32, "vV4hiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_f32_i32, "vfiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f32_i32, "vV4fiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f16_i32, "vV4hiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_f32_i32, "vfiiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f32_i32, "vV4fiiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f16_i32, "vV4hiiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f32_i32, "vV4fiiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f16_i32, "vV4hiiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f32_i32, "vV4fiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f16_i32, "vV4hiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_f32_i32, "vfiiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f32_i32, "vV4fiiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f16_i32, "vV4hiiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_f32_i32, "vfiiiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f32_i32, "vV4fiiiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f16_i32, "vV4hiiiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f32_i32, "vV4fiiiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f16_i32, "vV4hiiiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f32_i32, "vV4fiiiiiV8iii", "nc", "") -TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f16_i32, "vV4hiiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f32_i32, "V4fiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f16_i32, "V4hiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f32_i32, "V4fiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f16_i32, "V4hiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f32_i32, "V4fiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f16_i32, "V4hiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_f32_i32, "fiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f32_i32, "V4fiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f16_i32, "V4hiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f32_i32, "V4fiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f16_i32, "V4hiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f32_i32, "V4fiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f16_i32, "V4hiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f32_i32, "V4fiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f16_i32, "V4hiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f32_i32, "V4fiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f16_i32, "V4hiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_f32_i32, "fiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f32_i32, "V4fiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f16_i32, "V4hiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_f32_i32, "fiiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f32_i32, "V4fiiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f16_i32, "V4hiiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f32_i32, "V4fiiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f16_i32, "V4hiiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f32_i32, "V4fiiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f16_i32, "V4hiiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f32_i32, "vV4fiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f16_i32, "vV4hiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f32_i32, "vV4fiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f16_i32, "vV4hiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_f32_i32, "vfiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f32_i32, "vV4fiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f16_i32, "vV4hiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_f32_i32, "vfiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f32_i32, "vV4fiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f16_i32, "vV4hiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f32_i32, "vV4fiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f16_i32, "vV4hiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f32_i32, "vV4fiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f16_i32, "vV4hiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f32_i32, "vV4fiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f16_i32, "vV4hiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f32_i32, "vV4fiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f16_i32, "vV4hiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_f32_i32, "vfiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f32_i32, "vV4fiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f16_i32, "vV4hiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_f32_i32, "vfiiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f32_i32, "vV4fiiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f16_i32, "vV4hiiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f32_i32, "vV4fiiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f16_i32, "vV4hiiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f32_i32, "vV4fiiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f16_i32, "vV4hiiiiiQtii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_1d_v4f32_f32, "V4fifQtV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_1d_v4f16_f32, "V4hifQtV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_1darray_v4f32_f32, "V4fiffQtV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_1darray_v4f16_f32, "V4hiffQtV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_f32_f32, "fiffQtV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_v4f32_f32, "V4fiffQtV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_v4f16_f32, "V4hiffQtV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_f32_f32, "fifffQtV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_v4f32_f32, "V4fifffQtV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_v4f16_f32, "V4hifffQtV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f32_f32, "V4fifffQtV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f16_f32, "V4hifffQtV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f32_f32, "V4fifffQtV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f16_f32, "V4hifffQtV4ibii", "nc", "image-insts") #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 47a975ca78df4..84119b7b4a768 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -184,13 +184,83 @@ static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, return Call; } +static bool IsImageSampleBuiltIn(unsigned BuiltinID) { + switch (BuiltinID) { + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: + return true; + default: + return false; + } +} + +static llvm::Value *LoadTextureDescPtorAsVec8I32(CodeGenFunction &CGF, + llvm::Value *RsrcPtr) { + auto &B = CGF.Builder; + auto *VecTy = llvm::FixedVectorType::get(B.getInt32Ty(), 8); + + if (RsrcPtr->getType() == VecTy) + return RsrcPtr; + + if (RsrcPtr->getType()->isIntegerTy(32)) { + unsigned AS = 8; + llvm::PointerType *VecPtrTy = llvm::PointerType::get(VecTy, AS); + llvm::Value *Ptr = + B.CreateIntToPtr(RsrcPtr, VecPtrTy, "tex.rsrc.from.int"); + return B.CreateAlignedLoad(VecTy, Ptr, llvm::Align(32), "tex.rsrc.val"); + } + + if (RsrcPtr->getType()->isPointerTy()) { + unsigned AS = RsrcPtr->getType()->getPointerAddressSpace(); + auto *VecPtrTy = llvm::PointerType::get(VecTy, AS); + llvm::Value *Typed = B.CreateBitCast(RsrcPtr, VecPtrTy, "tex.rsrc.typed"); + return B.CreateAlignedLoad(VecTy, Typed, llvm::Align(32), "tex.rsrc.val"); + } + + const auto &DL = CGF.CGM.getDataLayout(); + if (DL.getTypeSizeInBits(RsrcPtr->getType()) == 256) + return B.CreateBitCast(RsrcPtr, VecTy, "tex.rsrc.val"); + + RsrcPtr->getType()->print(llvm::errs()); + llvm::report_fatal_error(": Unexpected texture resource argument form"); +} + +static unsigned GetTextureDescIndex(unsigned BuiltinID, const CallExpr *E) { + unsigned N = E->getNumArgs(); + if (IsImageSampleBuiltIn(BuiltinID)) { + if (N < 5) return (unsigned)-1; + return N - 5; + } + + if (N < 3) return (unsigned)-1; + return N - 3; +} + llvm::CallInst *EmitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF, const clang::CallExpr *E, unsigned IntrinsicID, bool IsImageStore) { clang::SmallVector Args; - for (unsigned I = 0; I < E->getNumArgs(); ++I) - Args.push_back(CGF.EmitScalarExpr(E->getArg(I))); + unsigned RsrcIndex = GetTextureDescIndex(E->getBuiltinCallee(), E); + + for (unsigned I = 0; I < E->getNumArgs(); ++I){ + llvm::Value *V = CGF.EmitScalarExpr(E->getArg(I)); + if (I == RsrcIndex) + V = LoadTextureDescPtorAsVec8I32(CGF, V); + Args.push_back(V); + } llvm::Type *RetTy = CGF.ConvertType(E->getType()); if (IsImageStore) diff --git a/clang/test/CodeGen/builtins-image-load.c b/clang/test/CodeGen/builtins-image-load.c index 67548a567723e..8442124416338 100644 --- a/clang/test/CodeGen/builtins-image-load.c +++ b/clang/test/CodeGen/builtins-image-load.c @@ -1,1162 +1,1203 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 %s -emit-llvm -o - | FileCheck %s -typedef int int8 __attribute__((ext_vector_type(8))); typedef int int4 __attribute__((ext_vector_type(4))); typedef float float4 __attribute__((ext_vector_type(4))); typedef _Float16 half; typedef half half4 __attribute__((ext_vector_type(4))); // CHECK-LABEL: define dso_local float @test_builtin_image_load_2d( -// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 12, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 106, i32 103) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 12, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 106, i32 103) // CHECK-NEXT: ret float [[TMP3]] // -float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) { +float test_builtin_image_load_2d(float f32, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_2d_f32_i32(12, i32, i32, vec8i32, 106, 103); + return __builtin_amdgcn_image_load_2d_f32_i32(12, i32, i32, tex, 106, 103); } // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2d_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x float> [[TMP3]] // -float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) { +float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2d_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x half> [[TMP3]] // -half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) { +half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local float @test_builtin_image_load_2darray( -// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret float [[TMP4]] // -float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) { +float test_builtin_image_load_2darray(float f32, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2darray_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x float> [[TMP4]] // -float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) { +float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2darray_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x half> [[TMP4]] // -half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) { +half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1d_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP1]], i32 120, i32 110) +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x float> [[TMP2]] // -float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) { +float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_1d_v4f32_i32(100, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_1d_v4f32_i32(100, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1d_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP1]], i32 120, i32 110) +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x half> [[TMP2]] // -half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) { +half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1darray_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x float> [[TMP3]] // -float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) { +float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1darray_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x half> [[TMP3]] // -half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) { +half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_3d_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x float> [[TMP4]] // -float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) { +float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_3d_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x half> [[TMP4]] // -half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, int8 vec8i32) { +half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_3d_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_3d_v4f16_i32(100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_cube_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x float> [[TMP4]] // -float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) { +float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_cube_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_cube_v4f32_i32(100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_cube_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x half> [[TMP4]] // -half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) { +half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_cube_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_cube_v4f16_i32(100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1d_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x float> [[TMP3]] // -float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) { +float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(100, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(100, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1d_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x half> [[TMP3]] // -half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) { +half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1darray_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x float> [[TMP4]] // -float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) { +float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1darray_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x half> [[TMP4]] // -half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) { +half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2d( -// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.mip.2d.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.mip.2d.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret float [[TMP4]] // -float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) { +float test_builtin_image_load_mip_2d(float f32, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_mip_2d_f32_i32(100, i32, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_mip_2d_f32_i32(100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2d_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x float> [[TMP4]] // -float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) { +float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2d_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x half> [[TMP4]] // -half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) { +half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2darray( -// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.load.mip.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.load.mip.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret float [[TMP5]] // -float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) { +float test_builtin_image_load_mip_2darray(float f32, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_mip_2darray_f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_mip_2darray_f32_i32(100, i32, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2darray_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x float> [[TMP5]] // -float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) { +float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2darray_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x half> [[TMP5]] // -half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) { +half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_3d_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x float> [[TMP5]] // -float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) { +float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(100, i32, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_3d_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x half> [[TMP5]] // -half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) { +half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(100, i32, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_cube_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x float> [[TMP5]] // -float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) { +float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(100, i32, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_cube_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret <4 x half> [[TMP5]] // -half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) { +half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); + return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_1d_v4f32_f32( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32 // CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TMP1]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110) +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110) // CHECK-NEXT: ret <4 x float> [[TMP3]] // -float4 test_builtin_amdgcn_image_sample_1d_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) { - return __builtin_amdgcn_image_sample_1d_v4f32_f32(100, f32, vec8i32, vec4i32, 0, 120, 110); +float4 test_builtin_amdgcn_image_sample_1d_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) { + return __builtin_amdgcn_image_sample_1d_v4f32_f32(100, f32, tex, vec4i32, 0, 120, 110); } // CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_1d_v4f16_f32( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32 // CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TMP1]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110) +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110) // CHECK-NEXT: ret <4 x half> [[TMP3]] // -half4 test_builtin_amdgcn_image_sample_1d_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) { - return __builtin_amdgcn_image_sample_1d_v4f16_f32(100, f32, vec8i32, vec4i32, 0, 120, 110); +half4 test_builtin_amdgcn_image_sample_1d_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) { + return __builtin_amdgcn_image_sample_1d_v4f16_f32(100, f32, tex, vec4i32, 0, 120, 110); } // CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_1darray_v4f32_f32( -// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) // CHECK-NEXT: ret <4 x float> [[TMP4]] // -float4 test_builtin_amdgcn_image_sample_1darray_v4f32_f32(int i32, float f32, int8 vec8i32, int4 vec4i32) { - return __builtin_amdgcn_image_sample_1darray_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); +float4 test_builtin_amdgcn_image_sample_1darray_v4f32_f32(int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) { + return __builtin_amdgcn_image_sample_1darray_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); } // CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_1darray_v4f16_f32( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) // CHECK-NEXT: ret <4 x half> [[TMP4]] // -half4 test_builtin_amdgcn_image_sample_1darray_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) { - return __builtin_amdgcn_image_sample_1darray_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); +half4 test_builtin_amdgcn_image_sample_1darray_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) { + return __builtin_amdgcn_image_sample_1darray_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); } // CHECK-LABEL: define dso_local float @test_builtin_amdgcn_image_sample_2d_f32_f32( -// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.sample.2d.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.sample.2d.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) // CHECK-NEXT: ret float [[TMP4]] // -float test_builtin_amdgcn_image_sample_2d_f32_f32(int i32, float f32, int8 vec8i32, int4 vec4i32) { - return __builtin_amdgcn_image_sample_2d_f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); +float test_builtin_amdgcn_image_sample_2d_f32_f32(int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) { + return __builtin_amdgcn_image_sample_2d_f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); } // CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_2d_v4f32_f32( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) // CHECK-NEXT: ret <4 x float> [[TMP4]] // -float4 test_builtin_amdgcn_image_sample_2d_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) { - return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); +float4 test_builtin_amdgcn_image_sample_2d_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) { + return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); } // CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_2d_v4f16_f32( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) // CHECK-NEXT: ret <4 x half> [[TMP4]] // -half4 test_builtin_amdgcn_image_sample_2d_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) { - return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); +half4 test_builtin_amdgcn_image_sample_2d_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) { + return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); } // CHECK-LABEL: define dso_local float @test_builtin_amdgcn_image_sample_2darray_f32_f32( -// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.2darray.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.2darray.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) // CHECK-NEXT: ret float [[TMP5]] // -float test_builtin_amdgcn_image_sample_2darray_f32_f32(int i32, float f32, int8 vec8i32, int4 vec4i32) { - return __builtin_amdgcn_image_sample_2darray_f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); +float test_builtin_amdgcn_image_sample_2darray_f32_f32(int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) { + return __builtin_amdgcn_image_sample_2darray_f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); } // CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_2darray_v4f32_f32( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) // CHECK-NEXT: ret <4 x float> [[TMP5]] // -float4 test_builtin_amdgcn_image_sample_2darray_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) { - return __builtin_amdgcn_image_sample_2darray_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); +float4 test_builtin_amdgcn_image_sample_2darray_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) { + return __builtin_amdgcn_image_sample_2darray_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); } // CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_2darray_v4f16_f32( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) // CHECK-NEXT: ret <4 x half> [[TMP5]] // -half4 test_builtin_amdgcn_image_sample_2darray_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) { - return __builtin_amdgcn_image_sample_2darray_v4f16_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); +half4 test_builtin_amdgcn_image_sample_2darray_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) { + return __builtin_amdgcn_image_sample_2darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); } // CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_3d_v4f32_f32( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) // CHECK-NEXT: ret <4 x float> [[TMP5]] // -float4 test_builtin_amdgcn_image_sample_3d_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) { - return __builtin_amdgcn_image_sample_3d_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); +float4 test_builtin_amdgcn_image_sample_3d_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) { + return __builtin_amdgcn_image_sample_3d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); } // CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_3d_v4f16_f32( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.3d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.3d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) // CHECK-NEXT: ret <4 x half> [[TMP5]] // -half4 test_builtin_amdgcn_image_sample_3d_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) { - return __builtin_amdgcn_image_sample_3d_v4f16_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); +half4 test_builtin_amdgcn_image_sample_3d_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) { + return __builtin_amdgcn_image_sample_3d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); } // CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_cube_v4f32_f32( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) // CHECK-NEXT: ret <4 x float> [[TMP5]] // -float4 test_builtin_amdgcn_image_sample_cube_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) { - return __builtin_amdgcn_image_sample_cube_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); +float4 test_builtin_amdgcn_image_sample_cube_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) { + return __builtin_amdgcn_image_sample_cube_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); } // CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_cube_v4f16_f32( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.cube.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.cube.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) // CHECK-NEXT: ret <4 x half> [[TMP5]] // -half4 test_builtin_amdgcn_image_sample_cube_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) { - return __builtin_amdgcn_image_sample_cube_v4f16_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); +half4 test_builtin_amdgcn_image_sample_cube_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) { + return __builtin_amdgcn_image_sample_cube_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); } diff --git a/clang/test/CodeGen/builtins-image-store.c b/clang/test/CodeGen/builtins-image-store.c index cd2b09e074c59..5309a16df7033 100644 --- a/clang/test/CodeGen/builtins-image-store.c +++ b/clang/test/CodeGen/builtins-image-store.c @@ -1,703 +1,730 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 %s -emit-llvm -o - | FileCheck %s -typedef int int8 __attribute__((ext_vector_type(8))); typedef float float4 __attribute__((ext_vector_type(4))); typedef _Float16 half; typedef half half4 __attribute__((ext_vector_type(4))); // CHECK-LABEL: define dso_local void @test_builtin_image_store_2d( -// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.f32.i32.v8i32(float [[TMP0]], i32 12, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 106, i32 103) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.f32.i32.v8i32(float [[TMP0]], i32 12, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 106, i32 103) // CHECK-NEXT: ret void // -void test_builtin_image_store_2d(float f32, int i32, int8 vec8i32) { +void test_builtin_image_store_2d(float f32, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_2d_f32_i32(f32, 12, i32, i32, vec8i32, 106, 103); + __builtin_amdgcn_image_store_2d_f32_i32(f32, 12, i32, i32, tex, 106, 103); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_2d_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_2d_1(float4 v4f32, int i32, int8 vec8i32) { +void test_builtin_image_store_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_2d_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_2d_2(half4 v4f16, int i32, int8 vec8i32) { +void test_builtin_image_store_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray( -// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_2darray(float f32, int i32, int8 vec8i32) { +void test_builtin_image_store_2darray(float f32, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_2darray_f32_i32(f32, 100, i32, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_2darray_f32_i32(f32, 100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_2darray_1(float4 v4f32, int i32, int8 vec8i32) { +void test_builtin_image_store_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_2darray_2(half4 v4f16, int i32, int8 vec8i32) { +void test_builtin_image_store_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_1d_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_1d_1(float4 v4f32, int i32, int8 vec8i32) { +void test_builtin_image_store_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_1d_v4f32_i32(v4f32, 100, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_1d_v4f32_i32(v4f32, 100, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_1d_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_1d_2(half4 v4f16, int i32, int8 vec8i32) { +void test_builtin_image_store_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_1d_v4f16_i32(v4f16, 100, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_1d_v4f16_i32(v4f16, 100, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_1darray_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_1darray_1(float4 v4f32, int i32, int8 vec8i32) { +void test_builtin_image_store_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_1darray_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_1darray_v4f32_i32(v4f32, 100, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_1darray_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_1darray_2(half4 v4f16, int i32, int8 vec8i32) { +void test_builtin_image_store_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_1darray_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_1darray_v4f16_i32(v4f16, 100, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_3d_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.3d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.3d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_3d_1(float4 v4f32, int i32, int8 vec8i32) { +void test_builtin_image_store_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_3d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_3d_v4f32_i32(v4f32, 100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_3d_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.3d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.3d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_3d_2(half4 v4f16, int i32, int8 vec8i32) { +void test_builtin_image_store_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_3d_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_3d_v4f16_i32(v4f16, 100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_cube_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.cube.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.cube.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_cube_1(float4 v4f32, int i32, int8 vec8i32) { +void test_builtin_image_store_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_cube_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_cube_v4f32_i32(v4f32, 100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_cube_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.cube.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.cube.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_cube_2(half4 v4f16, int i32, int8 vec8i32) { +void test_builtin_image_store_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_cube_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_cube_v4f16_i32(v4f16, 100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1d_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) { +void test_builtin_image_store_mip_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_mip_1d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_mip_1d_v4f32_i32(v4f32, 100, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1d_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) { +void test_builtin_image_store_mip_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_mip_1d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_mip_1d_v4f16_i32(v4f16, 100, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1darray_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) { +void test_builtin_image_store_mip_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_mip_1darray_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_mip_1darray_v4f32_i32(v4f32, 100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1darray_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) { +void test_builtin_image_store_mip_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_mip_1darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_mip_1darray_v4f16_i32(v4f16, 100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d( -// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_mip_2d(float f32, int i32, int8 vec8i32) { +void test_builtin_image_store_mip_2d(float f32, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_mip_2d_f32_i32(f32, 100, i32, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_mip_2d_f32_i32(f32, 100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) { +void test_builtin_image_store_mip_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_mip_2d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_mip_2d_v4f32_i32(v4f32, 100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) { +void test_builtin_image_store_mip_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_mip_2d_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_mip_2d_v4f16_i32(v4f16, 100, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray( -// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110) +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_mip_2darray(float f32, int i32, int8 vec8i32) { +void test_builtin_image_store_mip_2darray(float f32, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_mip_2darray_f32_i32(f32, 100, i32, i32, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_mip_2darray_f32_i32(f32, 100, i32, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110) +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) { +void test_builtin_image_store_mip_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_mip_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_mip_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110) +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) { +void test_builtin_image_store_mip_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_mip_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_mip_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_3d_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110) +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) { +void test_builtin_image_store_mip_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_mip_3d_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_mip_3d_v4f32_i32(v4f32, 100, i32, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_3d_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.3d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110) +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.3d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) { +void test_builtin_image_store_mip_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_mip_3d_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_mip_3d_v4f16_i32(v4f16, 100, i32, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_cube_1( -// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110) +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) { +void test_builtin_image_store_mip_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_mip_cube_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_mip_cube_v4f32_i32(v4f32, 100, i32, i32, i32, i32, tex, 120, 110); } // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_cube_2( -// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) // CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr // CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.cube.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110) +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32 +// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.cube.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110) // CHECK-NEXT: ret void // -void test_builtin_image_store_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) { +void test_builtin_image_store_mip_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { - __builtin_amdgcn_image_store_mip_cube_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110); + __builtin_amdgcn_image_store_mip_cube_v4f16_i32(v4f16, 100, i32, i32, i32, i32, tex, 120, 110); } diff --git a/clang/test/SemaOpenCL/builtins-image-load-param-gfx1100.cl b/clang/test/SemaOpenCL/builtins-image-load-param-gfx1100.cl new file mode 100644 index 0000000000000..8f609dcbd34f2 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-image-load-param-gfx1100.cl @@ -0,0 +1,194 @@ +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 -S -verify=expected -o - %s +// REQUIRES: amdgpu-registered-target + +typedef int int4 __attribute__((ext_vector_type(4))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef half half4 __attribute__((ext_vector_type(4))); + +float test_builtin_image_load_2d(float f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, i32, tex, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_f32_i32' must be a constant integer}} +} +float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f16_i32' must be a constant integer}} +} + + +float test_builtin_image_load_2darray(float f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_f32_i32' must be a constant integer}} +} +float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_1d_v4f32_i32(i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_3d_v4f16_i32(i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_cube_v4f32_i32(i32, i32, i32, i32, tex, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_cube_v4f16_i32(i32, i32, i32, i32, tex, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(i32, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f16_i32' must be a constant integer}} +} + +float test_builtin_image_load_mip_2d(float f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_2d_f32_i32(i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_f32_i32' must be a constant integer}} +} +float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(i32, i32, i32, i32, tex, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f16_i32' must be a constant integer}} +} + +float test_builtin_image_load_mip_2darray(float f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_2darray_f32_i32(i32, i32, i32, i32, i32, tex, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_f32_i32' must be a constant integer}} +} +float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(i32, i32, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(i32, i32, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(i32, i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f16_i32' must be a constant integer}} +} + +float test_builtin_image_sample_2d(float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2d_f32_f32(i32, f32, f32, tex, vec4i32, 0, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_f32_f32' must be a constant integer}} +} +float4 test_builtin_image_sample_2d_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_v4f32_f32' must be a constant integer}} +} +half4 test_builtin_image_sample_2d_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_v4f16_f32' must be a constant integer}} +} + +float test_builtin_image_sample_2darray(float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2darray_f32_f32(100, f32, f32, f32, tex, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_f32_f32' must be a constant integer}} +} +float4 test_builtin_image_sample_2darray_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2darray_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_v4f32_f32' must be a constant integer}} +} +half4 test_builtin_image_sample_2darray_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_v4f16_f32' must be a constant integer}} +} + +float4 test_builtin_image_sample_1d_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_1d_v4f32_f32(i32, f32, tex, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_1d_v4f32_f32' must be a constant integer}} +} +half4 test_builtin_image_sample_1d_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_1d_v4f16_f32(100, f32, tex, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_1d_v4f16_f32' must be a constant integer}} +} + +float4 test_builtin_image_sample_1darray_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_1darray_v4f32_f32(100, f32, f32, tex, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_1darray_v4f32_f32' must be a constant integer}} +} +half4 test_builtin_image_sample_1darray_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_1darray_v4f16_f32(100, f32, f32, tex, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_1darray_v4f16_f32' must be a constant integer}} +} + +float4 test_builtin_image_sample_3d_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_3d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_3d_v4f32_f32' must be a constant integer}} +} +half4 test_builtin_image_sample_3d_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_3d_v4f16_f32(i32, f32, f32, f32, tex, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_3d_v4f16_f32' must be a constant integer}} +} + +float4 test_builtin_image_sample_cube_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_cube_v4f32_f32(i32, f32, f32, f32, tex, vec4i32, 0, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_cube_v4f32_f32' must be a constant integer}} +} +half4 test_builtin_image_sample_cube_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_cube_v4f16_f32(i32, f32, f32, f32, tex, vec4i32, 0, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_cube_v4f16_f32' must be a constant integer}} +} + diff --git a/clang/test/SemaOpenCL/builtins-image-load-param-gfx942.cl b/clang/test/SemaOpenCL/builtins-image-load-param-gfx942.cl new file mode 100644 index 0000000000000..b8780024f1076 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-image-load-param-gfx942.cl @@ -0,0 +1,219 @@ +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx942 -verify -S -o - %s +// REQUIRES: amdgpu-registered-target + +typedef int int4 __attribute__((ext_vector_type(4))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef half half4 __attribute__((ext_vector_type(4))); + +float test_builtin_image_load_2d(float f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, i32, tex, 106, 103); //expected-error{{'test_builtin_image_load_2d' needs target feature image-insts}} +} +float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_load_2d_1' needs target feature image-insts}} +} +half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_2d_2' needs target feature image-insts}} +} + + +float test_builtin_image_load_2darray(float f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_load_2darray' needs target feature image-insts}} +} +float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_load_2darray_1' needs target feature image-insts}} +} +half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_2darray_2' needs target feature image-insts}} +} + +float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_1d_v4f32_i32(i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_1d_1' needs target feature image-insts}} +} +half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_1d_2' needs target feature image-insts}} +} + +float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_load_1darray_1' needs target feature image-insts}} +} +half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_load_1darray_2' needs target feature image-insts}} +} + +float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_3d_1' needs target feature image-insts}} +} +half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_3d_v4f16_i32(i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_3d_2' needs target feature image-insts}} +} + +float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_cube_v4f32_i32(i32, i32, i32, i32, tex, 120, 110); //expected-error{{'test_builtin_image_load_cube_1' needs target feature image-insts}} +} +half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_cube_v4f16_i32(i32, i32, i32, i32, tex, 120, 110); //expected-error{{'test_builtin_image_load_cube_2' needs target feature image-insts}} +} + +float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_mip_1d_1' needs target feature image-insts}} +} +half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_mip_1d_2' needs target feature image-insts}} +} + +float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(i32, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_load_mip_1darray_1' needs target feature image-insts}} +} +half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_load_mip_1darray_2' needs target feature image-insts}} +} + +float test_builtin_image_load_mip_2d(float f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_2d_f32_i32(i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_mip_2d' needs target feature image-insts}} +} +float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_mip_2d_1' needs target feature image-insts}} +} +half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(i32, i32, i32, i32, tex, 120, 110); //expected-error{{'test_builtin_image_load_mip_2d_2' needs target feature image-insts}} +} + +float test_builtin_image_load_mip_2darray(float f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_2darray_f32_i32(i32, i32, i32, i32, i32, tex, 120, 110); //expected-error{{'test_builtin_image_load_mip_2darray' needs target feature image-insts}} +} +float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_mip_2darray_1' needs target feature image-insts}} +} +half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_mip_2darray_2' needs target feature image-insts}} +} + +float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(i32, i32, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_load_mip_3d_1' needs target feature image-insts}} +} +half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(i32, i32, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_load_mip_3d_2' needs target feature image-insts}} +} + +float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(i32, i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_mip_cube_1' needs target feature image-insts}} +} +half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_mip_cube_2' needs target feature image-insts}} +} + +float test_builtin_image_load_2d_gfx(float f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_2d_f32_i32(12, i32, i32, tex, 106, 103); //expected-error{{'test_builtin_image_load_2d_gfx' needs target feature image-insts}} +} +float4 test_builtin_image_load_2d_gfx_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, tex, 120, 110); //expected-error{{'test_builtin_image_load_2d_gfx_1' needs target feature image-insts}} +} +half4 test_builtin_image_load_2d_gfx_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, tex, 120, 110); //expected-error{{'test_builtin_image_load_2d_gfx_2' needs target feature image-insts}} +} + +float test_builtin_image_sample_2d(float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2d_f32_f32(i32, f32, f32, tex, vec4i32, 0, 106, 103); //expected-error{{'test_builtin_image_sample_2d' needs target feature image-insts}} +} +float4 test_builtin_image_sample_2d_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, i32, 110); //expected-error{{'test_builtin_image_sample_2d_1' needs target feature image-insts}} +} +half4 test_builtin_image_sample_2d_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, i32); //expected-error{{'test_builtin_image_sample_2d_2' needs target feature image-insts}} +} + +float test_builtin_image_sample_2darray(float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2darray_f32_f32(100, f32, f32, f32, tex, vec4i32, 0, i32, 110); //expected-error{{'test_builtin_image_sample_2darray' needs target feature image-insts}} +} +float4 test_builtin_image_sample_2darray_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2darray_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, i32, 110); //expected-error{{'test_builtin_image_sample_2darray_1' needs target feature image-insts}} +} +half4 test_builtin_image_sample_2darray_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, i32); //expected-error{{'test_builtin_image_sample_2darray_2' needs target feature image-insts}} +} + +float4 test_builtin_image_sample_1d_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_1d_v4f32_f32(i32, f32, tex, vec4i32, 0, 120, i32); //expected-error{{'test_builtin_image_sample_1d_1' needs target feature image-insts}} +} +half4 test_builtin_image_sample_1d_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_1d_v4f16_f32(100, f32, tex, vec4i32, 0, 120, i32); //expected-error{{'test_builtin_image_sample_1d_2' needs target feature image-insts}} +} + +float4 test_builtin_image_sample_1darray_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_1darray_v4f32_f32(100, f32, f32, tex, vec4i32, 0, i32, 110); //expected-error{{'test_builtin_image_sample_1darray_1' needs target feature image-insts}} +} +half4 test_builtin_image_sample_1darray_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_1darray_v4f16_f32(100, f32, f32, tex, vec4i32, 0, i32, 110); //expected-error{{'test_builtin_image_sample_1darray_2' needs target feature image-insts}} +} + +float4 test_builtin_image_sample_3d_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_3d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, i32); //expected-error{{'test_builtin_image_sample_3d_1' needs target feature image-insts}} +} +half4 test_builtin_image_sample_3d_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_3d_v4f16_f32(i32, f32, f32, f32, tex, vec4i32, 0, 120, i32); //expected-error{{'test_builtin_image_sample_3d_2' needs target feature image-insts}} +} + +float4 test_builtin_image_sample_cube_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_cube_v4f32_f32(i32, f32, f32, f32, tex, vec4i32, 0, 120, 110); //expected-error{{'test_builtin_image_sample_cube_1' needs target feature image-insts}} +} +half4 test_builtin_image_sample_cube_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_cube_v4f16_f32(i32, f32, f32, f32, tex, vec4i32, 0, 120, 110); //expected-error{{'test_builtin_image_sample_cube_2' needs target feature image-insts}} +} + +float test_builtin_image_sample_2d_gfx(float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2d_f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); //expected-error{{'test_builtin_image_sample_2d_gfx' needs target feature image-insts}} +} +float4 test_builtin_image_sample_2d_gfx_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); //expected-error{{'test_builtin_image_sample_2d_gfx_1' needs target feature image-insts}} +} +half4 test_builtin_image_sample_2d_gfx_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); //expected-error{{'test_builtin_image_sample_2d_gfx_2' needs target feature image-insts}} +} diff --git a/clang/test/SemaOpenCL/builtins-image-load-param.cl b/clang/test/SemaOpenCL/builtins-image-load-param.cl deleted file mode 100644 index 0da4831e847f7..0000000000000 --- a/clang/test/SemaOpenCL/builtins-image-load-param.cl +++ /dev/null @@ -1,222 +0,0 @@ -// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 -S -verify=expected -o - %s -// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -S -verify=expected -o - %s -// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx942 -S -verify=GFX94 -o - %s -// REQUIRES: amdgpu-registered-target - -typedef int int8 __attribute__((ext_vector_type(8))); -typedef int int4 __attribute__((ext_vector_type(4))); -typedef float float4 __attribute__((ext_vector_type(4))); -typedef half half4 __attribute__((ext_vector_type(4))); - -float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, i32, vec8i32, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_f32_i32' must be a constant integer}} -} -float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f16_i32' must be a constant integer}} -} - - -float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_f32_i32' must be a constant integer}} -} -float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f16_i32' must be a constant integer}} -} - -float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_1d_v4f32_i32(i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f16_i32' must be a constant integer}} -} - -float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f16_i32' must be a constant integer}} -} - -float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_3d_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f16_i32' must be a constant integer}} -} - -float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_cube_v4f32_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_cube_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f16_i32' must be a constant integer}} -} - -float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f16_i32' must be a constant integer}} -} - -float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f16_i32' must be a constant integer}} -} - -float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_2d_f32_i32(i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_f32_i32' must be a constant integer}} -} -float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f16_i32' must be a constant integer}} -} - -float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_2darray_f32_i32(i32, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_f32_i32' must be a constant integer}} -} -float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f16_i32' must be a constant integer}} -} - -float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f16_i32' must be a constant integer}} -} - -float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(i32, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f32_i32' must be a constant integer}} -} -half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f16_i32' must be a constant integer}} -} - -float test_builtin_image_load_2d_gfx(float f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2d_f32_i32(12, i32, i32, vec8i32, 106, 103); //GFX94-error{{'__builtin_amdgcn_image_load_2d_f32_i32' needs target feature image-insts}} -} -float4 test_builtin_image_load_2d_gfx_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_load_2d_v4f32_i32' needs target feature image-insts}} -} -half4 test_builtin_image_load_2d_gfx_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_load_2d_v4f16_i32' needs target feature image-insts}} -} - -float test_builtin_image_sample_2d(float f32, int i32, int8 vec8i32, int4 vec4i32) { - - return __builtin_amdgcn_image_sample_2d_f32_f32(i32, f32, f32, vec8i32, vec4i32, 0, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_f32_f32' must be a constant integer}} -} -float4 test_builtin_image_sample_2d_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) { - - return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_v4f32_f32' must be a constant integer}} -} -half4 test_builtin_image_sample_2d_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) { - - return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_v4f16_f32' must be a constant integer}} -} - -float test_builtin_image_sample_2darray(float f32, int i32, int8 vec8i32, int4 vec4i32) { - - return __builtin_amdgcn_image_sample_2darray_f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_f32_f32' must be a constant integer}} -} -float4 test_builtin_image_sample_2darray_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) { - - return __builtin_amdgcn_image_sample_2darray_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_v4f32_f32' must be a constant integer}} -} -half4 test_builtin_image_sample_2darray_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) { - - return __builtin_amdgcn_image_sample_2darray_v4f16_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_v4f16_f32' must be a constant integer}} -} - -float4 test_builtin_image_sample_1d_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) { - - return __builtin_amdgcn_image_sample_1d_v4f32_f32(i32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_1d_v4f32_f32' must be a constant integer}} -} -half4 test_builtin_image_sample_1d_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) { - - return __builtin_amdgcn_image_sample_1d_v4f16_f32(100, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_1d_v4f16_f32' must be a constant integer}} -} - -float4 test_builtin_image_sample_1darray_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) { - - return __builtin_amdgcn_image_sample_1darray_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_1darray_v4f32_f32' must be a constant integer}} -} -half4 test_builtin_image_sample_1darray_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) { - - return __builtin_amdgcn_image_sample_1darray_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_1darray_v4f16_f32' must be a constant integer}} -} - -float4 test_builtin_image_sample_3d_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) { - - return __builtin_amdgcn_image_sample_3d_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_3d_v4f32_f32' must be a constant integer}} -} -half4 test_builtin_image_sample_3d_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) { - - return __builtin_amdgcn_image_sample_3d_v4f16_f32(i32, f32, f32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_3d_v4f16_f32' must be a constant integer}} -} - -float4 test_builtin_image_sample_cube_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) { - - return __builtin_amdgcn_image_sample_cube_v4f32_f32(i32, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_cube_v4f32_f32' must be a constant integer}} -} -half4 test_builtin_image_sample_cube_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) { - - return __builtin_amdgcn_image_sample_cube_v4f16_f32(i32, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_cube_v4f16_f32' must be a constant integer}} -} - -float test_builtin_image_sample_2d_gfx(float f32, int i32, int8 vec8i32, int4 vec4i32) { - - return __builtin_amdgcn_image_sample_2d_f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_sample_2d_f32_f32' needs target feature image-insts}} -} -float4 test_builtin_image_sample_2d_gfx_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) { - - return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_sample_2d_v4f32_f32' needs target feature image-insts}} -} -half4 test_builtin_image_sample_2d_gfx_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) { - - return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_sample_2d_v4f16_f32' needs target feature image-insts}} -} diff --git a/clang/test/SemaOpenCL/builtins-image-store-param-gfx1100.cl b/clang/test/SemaOpenCL/builtins-image-store-param-gfx1100.cl new file mode 100644 index 0000000000000..4f6347e1c5286 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-image-store-param-gfx1100.cl @@ -0,0 +1,129 @@ +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 -S -verify=expected -o - %s +// REQUIRES: amdgpu-registered-target + +typedef float float4 __attribute__((ext_vector_type(4))); +typedef half half4 __attribute__((ext_vector_type(4))); + +void test_builtin_image_store_2d(float f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_2d_f32_i32(f32, i32, i32, i32, tex, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_f32_i32' must be a constant integer}} +} +void test_builtin_image_store_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_2darray(float f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_2darray_f32_i32(f32, 100, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_f32_i32' must be a constant integer}} +} +void test_builtin_image_store_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_1d_v4f32_i32(v4f32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_1d_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_1d_v4f16_i32(v4f16, 100, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_1d_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_1darray_v4f32_i32(v4f32, 100, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_1darray_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_1darray_v4f16_i32(v4f16, 100, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_1darray_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_3d_v4f32_i32(v4f32, 100, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_3d_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_3d_v4f16_i32(v4f16, i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_3d_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_cube_v4f32_i32(v4f32, i32, i32, i32, i32, tex, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_cube_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_cube_v4f16_i32(v4f16, i32, i32, i32, i32, tex, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_cube_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_mip_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_1d_v4f32_i32(v4f32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1d_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_mip_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_1d_v4f16_i32(v4f16, 100, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1d_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_mip_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_1darray_v4f32_i32(v4f32, i32, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1darray_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_mip_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_1darray_v4f16_i32(v4f16, 100, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1darray_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_mip_2d(float f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_2d_f32_i32(f32, i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_f32_i32' must be a constant integer}} +} +void test_builtin_image_store_mip_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_2d_v4f32_i32(v4f32, 100, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_mip_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_2d_v4f16_i32(v4f16, i32, i32, i32, i32, tex, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_mip_2darray(float f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_2darray_f32_i32(f32, i32, i32, i32, i32, i32, tex, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_f32_i32' must be a constant integer}} +} +void test_builtin_image_store_mip_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_mip_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_mip_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_3d_v4f32_i32(v4f32, i32, i32, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_3d_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_mip_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_3d_v4f16_i32(v4f16, i32, i32, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_3d_v4f16_i32' must be a constant integer}} +} + +void test_builtin_image_store_mip_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_cube_v4f32_i32(v4f32, i32, i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_cube_v4f32_i32' must be a constant integer}} +} +void test_builtin_image_store_mip_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_cube_v4f16_i32(v4f16, 100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_cube_v4f16_i32' must be a constant integer}} +} diff --git a/clang/test/SemaOpenCL/builtins-image-store-param-gfx942.cl b/clang/test/SemaOpenCL/builtins-image-store-param-gfx942.cl new file mode 100644 index 0000000000000..d0085e5403b5f --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-image-store-param-gfx942.cl @@ -0,0 +1,129 @@ +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx942 -S -verify -o - %s +// REQUIRES: amdgpu-registered-target + +typedef float float4 __attribute__((ext_vector_type(4))); +typedef half half4 __attribute__((ext_vector_type(4))); + +void test_builtin_image_store_2d(float f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_2d_f32_i32(f32, i32, i32, i32, tex, 106, 103); //expected-error{{'test_builtin_image_store_2d' needs target feature image-insts}} +} +void test_builtin_image_store_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_store_2d_1' needs target feature image-insts}} +} +void test_builtin_image_store_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_2d_2' needs target feature image-insts}} +} + +void test_builtin_image_store_2darray(float f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_2darray_f32_i32(f32, 100, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_store_2darray' needs target feature image-insts}} +} +void test_builtin_image_store_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_store_2darray_1' needs target feature image-insts}} +} +void test_builtin_image_store_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_2darray_2' needs target feature image-insts}} +} + +void test_builtin_image_store_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_1d_v4f32_i32(v4f32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_1d_1' needs target feature image-insts}} +} +void test_builtin_image_store_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_1d_v4f16_i32(v4f16, 100, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_1d_2' needs target feature image-insts}} +} + +void test_builtin_image_store_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_1darray_v4f32_i32(v4f32, 100, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_store_1darray_1' needs target feature image-insts}} +} +void test_builtin_image_store_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_1darray_v4f16_i32(v4f16, 100, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_store_1darray_2' needs target feature image-insts}} +} + +void test_builtin_image_store_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_3d_v4f32_i32(v4f32, 100, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_3d_1' needs target feature image-insts}} +} +void test_builtin_image_store_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_3d_v4f16_i32(v4f16, i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_3d_2' needs target feature image-insts}} +} + +void test_builtin_image_store_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_cube_v4f32_i32(v4f32, i32, i32, i32, i32, tex, 120, 110); //expected-error{{'test_builtin_image_store_cube_1' needs target feature image-insts}} +} +void test_builtin_image_store_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_cube_v4f16_i32(v4f16, i32, i32, i32, i32, tex, 120, 110); //expected-error{{'test_builtin_image_store_cube_2' needs target feature image-insts}} +} + +void test_builtin_image_store_mip_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_1d_v4f32_i32(v4f32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_mip_1d_1' needs target feature image-insts}} +} +void test_builtin_image_store_mip_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_1d_v4f16_i32(v4f16, 100, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_mip_1d_2' needs target feature image-insts}} +} + +void test_builtin_image_store_mip_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_1darray_v4f32_i32(v4f32, i32, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_store_mip_1darray_1' needs target feature image-insts}} +} +void test_builtin_image_store_mip_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_1darray_v4f16_i32(v4f16, 100, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_store_mip_1darray_2' needs target feature image-insts}} +} + +void test_builtin_image_store_mip_2d(float f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_2d_f32_i32(f32, i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_mip_2d' needs target feature image-insts}} +} +void test_builtin_image_store_mip_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_2d_v4f32_i32(v4f32, 100, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_mip_2d_1' needs target feature image-insts}} +} +void test_builtin_image_store_mip_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_2d_v4f16_i32(v4f16, i32, i32, i32, i32, tex, 120, 110); //expected-error{{'test_builtin_image_store_mip_2d_2' needs target feature image-insts}} +} + +void test_builtin_image_store_mip_2darray(float f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_2darray_f32_i32(f32, i32, i32, i32, i32, i32, tex, 120, 110); //expected-error{{'test_builtin_image_store_mip_2darray' needs target feature image-insts}} +} +void test_builtin_image_store_mip_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_mip_2darray_1' needs target feature image-insts}} +} +void test_builtin_image_store_mip_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_mip_2darray_2' needs target feature image-insts}} +} + +void test_builtin_image_store_mip_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_3d_v4f32_i32(v4f32, i32, i32, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_store_mip_3d_1' needs target feature image-insts}} +} +void test_builtin_image_store_mip_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_3d_v4f16_i32(v4f16, i32, i32, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_store_mip_3d_2' needs target feature image-insts}} +} + +void test_builtin_image_store_mip_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_cube_v4f32_i32(v4f32, i32, i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_mip_cube_1' needs target feature image-insts}} +} +void test_builtin_image_store_mip_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) { + + return __builtin_amdgcn_image_store_mip_cube_v4f16_i32(v4f16, 100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_mip_cube_2' needs target feature image-insts}} +} diff --git a/clang/test/SemaOpenCL/builtins-image-store-param.cl b/clang/test/SemaOpenCL/builtins-image-store-param.cl deleted file mode 100644 index f84df77171098..0000000000000 --- a/clang/test/SemaOpenCL/builtins-image-store-param.cl +++ /dev/null @@ -1,145 +0,0 @@ -// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -S -verify=expected -o - %s -// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx942 -S -verify=GFX94 -o - %s -// REQUIRES: amdgpu-registered-target - -typedef int int8 __attribute__((ext_vector_type(8))); -typedef float float4 __attribute__((ext_vector_type(4))); -typedef half half4 __attribute__((ext_vector_type(4))); - - -void test_builtin_image_store_2d(float f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_2d_f32_i32(f32, i32, i32, i32, vec8i32, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_f32_i32' must be a constant integer}} -} -void test_builtin_image_store_2d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_v4f32_i32' must be a constant integer}} -} -void test_builtin_image_store_2d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_v4f16_i32' must be a constant integer}} -} - -void test_builtin_image_store_2darray(float f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_2darray_f32_i32(f32, 100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_f32_i32' must be a constant integer}} -} -void test_builtin_image_store_2darray_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_v4f32_i32' must be a constant integer}} -} -void test_builtin_image_store_2darray_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_v4f16_i32' must be a constant integer}} -} - -void test_builtin_image_store_1d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_1d_v4f32_i32(v4f32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_1d_v4f32_i32' must be a constant integer}} -} -void test_builtin_image_store_1d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_1d_v4f16_i32(v4f16, 100, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_1d_v4f16_i32' must be a constant integer}} -} - -void test_builtin_image_store_1darray_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_1darray_v4f32_i32(v4f32, 100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_1darray_v4f32_i32' must be a constant integer}} -} -void test_builtin_image_store_1darray_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_1darray_v4f16_i32(v4f16, 100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_1darray_v4f16_i32' must be a constant integer}} -} - -void test_builtin_image_store_3d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_3d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_3d_v4f32_i32' must be a constant integer}} -} -void test_builtin_image_store_3d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_3d_v4f16_i32(v4f16, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_3d_v4f16_i32' must be a constant integer}} -} - -void test_builtin_image_store_cube_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_cube_v4f32_i32(v4f32, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_cube_v4f32_i32' must be a constant integer}} -} -void test_builtin_image_store_cube_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_cube_v4f16_i32(v4f16, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_cube_v4f16_i32' must be a constant integer}} -} - -void test_builtin_image_store_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_mip_1d_v4f32_i32(v4f32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1d_v4f32_i32' must be a constant integer}} -} -void test_builtin_image_store_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_mip_1d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1d_v4f16_i32' must be a constant integer}} -} - -void test_builtin_image_store_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_mip_1darray_v4f32_i32(v4f32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1darray_v4f32_i32' must be a constant integer}} -} -void test_builtin_image_store_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_mip_1darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1darray_v4f16_i32' must be a constant integer}} -} - -void test_builtin_image_store_mip_2d(float f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_mip_2d_f32_i32(f32, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_f32_i32' must be a constant integer}} -} -void test_builtin_image_store_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_mip_2d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_v4f32_i32' must be a constant integer}} -} -void test_builtin_image_store_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_mip_2d_v4f16_i32(v4f16, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_v4f16_i32' must be a constant integer}} -} - -void test_builtin_image_store_mip_2darray(float f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_mip_2darray_f32_i32(f32, i32, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_f32_i32' must be a constant integer}} -} -void test_builtin_image_store_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_mip_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_v4f32_i32' must be a constant integer}} -} -void test_builtin_image_store_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_mip_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_v4f16_i32' must be a constant integer}} -} - -void test_builtin_image_store_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_mip_3d_v4f32_i32(v4f32, i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_3d_v4f32_i32' must be a constant integer}} -} -void test_builtin_image_store_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_mip_3d_v4f16_i32(v4f16, i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_3d_v4f16_i32' must be a constant integer}} -} - -void test_builtin_image_store_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_mip_cube_v4f32_i32(v4f32, i32, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_cube_v4f32_i32' must be a constant integer}} -} -void test_builtin_image_store_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) { - - return __builtin_amdgcn_image_store_mip_cube_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_cube_v4f16_i32' must be a constant integer}} -} - -void test_builtin_image_store_2d_gfx(float f32, int i32, int8 vec8i32) { - - __builtin_amdgcn_image_store_2d_f32_i32(f32, 12, i32, i32, vec8i32, 106, 103); //GFX94-error{{'__builtin_amdgcn_image_store_2d_f32_i32' needs target feature image-insts}} -} -void test_builtin_image_store_2d_gfx_1(float4 v4f32, int i32, int8 vec8i32) { - - __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_store_2d_v4f32_i32' needs target feature image-insts}} - } - void test_builtin_image_store_2d_gfx_2(half4 v4f16, int i32, int8 vec8i32) { - - __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_store_2d_v4f16_i32' needs target feature image-insts}} - } From 44e69e17d9703c9dace2d40d0a051692a8900577 Mon Sep 17 00:00:00 2001 From: ranapratap55 Date: Tue, 7 Oct 2025 12:29:31 +0530 Subject: [PATCH 7/8] update clang format --- clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 87 +++++++++++---------- clang/lib/Sema/SemaAMDGPU.cpp | 28 ++++--- 2 files changed, 59 insertions(+), 56 deletions(-) diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 84119b7b4a768..162ca0d331e61 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#include "CodeGenFunction.h" #include "CGBuiltin.h" +#include "CodeGenFunction.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "llvm/Analysis/ValueTracking.h" @@ -217,8 +217,7 @@ static llvm::Value *LoadTextureDescPtorAsVec8I32(CodeGenFunction &CGF, if (RsrcPtr->getType()->isIntegerTy(32)) { unsigned AS = 8; llvm::PointerType *VecPtrTy = llvm::PointerType::get(VecTy, AS); - llvm::Value *Ptr = - B.CreateIntToPtr(RsrcPtr, VecPtrTy, "tex.rsrc.from.int"); + llvm::Value *Ptr = B.CreateIntToPtr(RsrcPtr, VecPtrTy, "tex.rsrc.from.int"); return B.CreateAlignedLoad(VecTy, Ptr, llvm::Align(32), "tex.rsrc.val"); } @@ -240,22 +239,24 @@ static llvm::Value *LoadTextureDescPtorAsVec8I32(CodeGenFunction &CGF, static unsigned GetTextureDescIndex(unsigned BuiltinID, const CallExpr *E) { unsigned N = E->getNumArgs(); if (IsImageSampleBuiltIn(BuiltinID)) { - if (N < 5) return (unsigned)-1; + if (N < 5) + return (unsigned)-1; return N - 5; } - - if (N < 3) return (unsigned)-1; + + if (N < 3) + return (unsigned)-1; return N - 3; } -llvm::CallInst *EmitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF, - const clang::CallExpr *E, - unsigned IntrinsicID, - bool IsImageStore) { - clang::SmallVector Args; +llvm::CallInst * +EmitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF, + const clang::CallExpr *E, + unsigned IntrinsicID, bool IsImageStore) { + clang::SmallVector Args; unsigned RsrcIndex = GetTextureDescIndex(E->getBuiltinCallee(), E); - for (unsigned I = 0; I < E->getNumArgs(); ++I){ + for (unsigned I = 0; I < E->getNumArgs(); ++I) { llvm::Value *V = CGF.EmitScalarExpr(E->getArg(I)); if (I == RsrcIndex) V = LoadTextureDescPtorAsVec8I32(CGF, V); @@ -1028,133 +1029,133 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_load_1d, false); + *this, E, Intrinsic::amdgcn_image_load_1d, false); case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_load_1darray, false); + *this, E, Intrinsic::amdgcn_image_load_1darray, false); case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_load_2d, false); + *this, E, Intrinsic::amdgcn_image_load_2d, false); case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_load_2darray, false); + *this, E, Intrinsic::amdgcn_image_load_2darray, false); case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_load_3d, false); + *this, E, Intrinsic::amdgcn_image_load_3d, false); case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_load_cube, false); + *this, E, Intrinsic::amdgcn_image_load_cube, false); case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_load_mip_1d, false); + *this, E, Intrinsic::amdgcn_image_load_mip_1d, false); case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_load_mip_1darray, false); + *this, E, Intrinsic::amdgcn_image_load_mip_1darray, false); case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_load_mip_2d, false); + *this, E, Intrinsic::amdgcn_image_load_mip_2d, false); case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_load_mip_2darray, false); + *this, E, Intrinsic::amdgcn_image_load_mip_2darray, false); case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_load_mip_3d, false); + *this, E, Intrinsic::amdgcn_image_load_mip_3d, false); case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_load_mip_cube, false); + *this, E, Intrinsic::amdgcn_image_load_mip_cube, false); case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_store_1d, true); + *this, E, Intrinsic::amdgcn_image_store_1d, true); case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_store_1darray, true); + *this, E, Intrinsic::amdgcn_image_store_1darray, true); case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_store_2d, true); + *this, E, Intrinsic::amdgcn_image_store_2d, true); case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_store_2darray, true); + *this, E, Intrinsic::amdgcn_image_store_2darray, true); case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_store_3d, true); + *this, E, Intrinsic::amdgcn_image_store_3d, true); case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_store_cube, true); + *this, E, Intrinsic::amdgcn_image_store_cube, true); case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_store_mip_1d, true); + *this, E, Intrinsic::amdgcn_image_store_mip_1d, true); case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_store_mip_1darray, true); + *this, E, Intrinsic::amdgcn_image_store_mip_1darray, true); case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_store_mip_2d, true); + *this, E, Intrinsic::amdgcn_image_store_mip_2d, true); case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_store_mip_2darray, true); + *this, E, Intrinsic::amdgcn_image_store_mip_2darray, true); case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_store_mip_3d, true); + *this, E, Intrinsic::amdgcn_image_store_mip_3d, true); case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_store_mip_cube, true); + *this, E, Intrinsic::amdgcn_image_store_mip_cube, true); case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_sample_1d, false); + *this, E, Intrinsic::amdgcn_image_sample_1d, false); case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_sample_1darray, false); + *this, E, Intrinsic::amdgcn_image_sample_1darray, false); case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_sample_2d, false); + *this, E, Intrinsic::amdgcn_image_sample_2d, false); case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_sample_2darray, false); + *this, E, Intrinsic::amdgcn_image_sample_2darray, false); case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_sample_3d, false); + *this, E, Intrinsic::amdgcn_image_sample_3d, false); case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: return EmitAMDGCNImageOverloadedReturnType( - *this, E, Intrinsic::amdgcn_image_sample_cube, false); + *this, E, Intrinsic::amdgcn_image_sample_cube, false); case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4: case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: { llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8); diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index d08778f2d1f79..9da426917e05e 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -139,7 +139,7 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32: case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32: - case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32: @@ -154,11 +154,12 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: { - StringRef FeatureList(getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID)); - if (!Builtin::evaluateRequiredTargetFeatures( - FeatureList, CallerFeatureMap)){ + StringRef FeatureList( + getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID)); + if (!Builtin::evaluateRequiredTargetFeatures(FeatureList, + CallerFeatureMap)) { Diag(TheCall->getBeginLoc(), diag::err_builtin_needs_feature) - << FD->getDeclName() << FeatureList; + << FD->getDeclName() << FeatureList; return false; } @@ -166,8 +167,8 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, llvm::APSInt Result; return ((SemaRef.BuiltinConstantArg(TheCall, 0, Result)) || - (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) || - (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result))); + (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) || + (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result))); } case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32: @@ -197,11 +198,12 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32: case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: { - StringRef FeatureList(getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID)); - if (!Builtin::evaluateRequiredTargetFeatures( - FeatureList, CallerFeatureMap)){ + StringRef FeatureList( + getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID)); + if (!Builtin::evaluateRequiredTargetFeatures(FeatureList, + CallerFeatureMap)) { Diag(TheCall->getBeginLoc(), diag::err_builtin_needs_feature) - << FD->getDeclName() << FeatureList; + << FD->getDeclName() << FeatureList; return false; } @@ -209,8 +211,8 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, llvm::APSInt Result; return ((SemaRef.BuiltinConstantArg(TheCall, 1, Result)) || - (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) || - (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result))); + (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) || + (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result))); } default: return false; From fb0dc56b78b64efa595855e6dc46a8d10ab12fdb Mon Sep 17 00:00:00 2001 From: ranapratap55 Date: Wed, 8 Oct 2025 14:44:33 +0530 Subject: [PATCH 8/8] Updated getTextureDescIndex() to lambda and minor changes --- clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 138 ++++++++---------- clang/lib/Sema/SemaAMDGPU.cpp | 12 +- ... builtins-image-load-param-gfx1100-err.cl} | 0 ...> builtins-image-load-param-gfx942-err.cl} | 0 ...builtins-image-store-param-gfx1100-err.cl} | 0 ... builtins-image-store-param-gfx942-err.cl} | 0 6 files changed, 66 insertions(+), 84 deletions(-) rename clang/test/SemaOpenCL/{builtins-image-load-param-gfx1100.cl => builtins-image-load-param-gfx1100-err.cl} (100%) rename clang/test/SemaOpenCL/{builtins-image-load-param-gfx942.cl => builtins-image-load-param-gfx942-err.cl} (100%) rename clang/test/SemaOpenCL/{builtins-image-store-param-gfx1100.cl => builtins-image-store-param-gfx1100-err.cl} (100%) rename clang/test/SemaOpenCL/{builtins-image-store-param-gfx942.cl => builtins-image-store-param-gfx942-err.cl} (100%) diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 162ca0d331e61..5049a0ab0a395 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -184,29 +184,7 @@ static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, return Call; } -static bool IsImageSampleBuiltIn(unsigned BuiltinID) { - switch (BuiltinID) { - case clang::AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32: - case clang::AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32: - case clang::AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32: - case clang::AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32: - case clang::AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32: - case clang::AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32: - case clang::AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32: - case clang::AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32: - case clang::AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32: - case clang::AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32: - case clang::AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32: - case clang::AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32: - case clang::AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32: - case clang::AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: - return true; - default: - return false; - } -} - -static llvm::Value *LoadTextureDescPtorAsVec8I32(CodeGenFunction &CGF, +static llvm::Value *loadTextureDescPtorAsVec8I32(CodeGenFunction &CGF, llvm::Value *RsrcPtr) { auto &B = CGF.Builder; auto *VecTy = llvm::FixedVectorType::get(B.getInt32Ty(), 8); @@ -215,15 +193,15 @@ static llvm::Value *LoadTextureDescPtorAsVec8I32(CodeGenFunction &CGF, return RsrcPtr; if (RsrcPtr->getType()->isIntegerTy(32)) { - unsigned AS = 8; - llvm::PointerType *VecPtrTy = llvm::PointerType::get(VecTy, AS); + llvm::PointerType *VecPtrTy = + llvm::PointerType::get(CGF.getLLVMContext(), 8); llvm::Value *Ptr = B.CreateIntToPtr(RsrcPtr, VecPtrTy, "tex.rsrc.from.int"); return B.CreateAlignedLoad(VecTy, Ptr, llvm::Align(32), "tex.rsrc.val"); } if (RsrcPtr->getType()->isPointerTy()) { - unsigned AS = RsrcPtr->getType()->getPointerAddressSpace(); - auto *VecPtrTy = llvm::PointerType::get(VecTy, AS); + auto *VecPtrTy = llvm::PointerType::get( + CGF.getLLVMContext(), RsrcPtr->getType()->getPointerAddressSpace()); llvm::Value *Typed = B.CreateBitCast(RsrcPtr, VecPtrTy, "tex.rsrc.typed"); return B.CreateAlignedLoad(VecTy, Typed, llvm::Align(32), "tex.rsrc.val"); } @@ -232,40 +210,44 @@ static llvm::Value *LoadTextureDescPtorAsVec8I32(CodeGenFunction &CGF, if (DL.getTypeSizeInBits(RsrcPtr->getType()) == 256) return B.CreateBitCast(RsrcPtr, VecTy, "tex.rsrc.val"); - RsrcPtr->getType()->print(llvm::errs()); - llvm::report_fatal_error(": Unexpected texture resource argument form"); -} - -static unsigned GetTextureDescIndex(unsigned BuiltinID, const CallExpr *E) { - unsigned N = E->getNumArgs(); - if (IsImageSampleBuiltIn(BuiltinID)) { - if (N < 5) - return (unsigned)-1; - return N - 5; - } - - if (N < 3) - return (unsigned)-1; - return N - 3; + llvm::report_fatal_error("Unexpected texture resource argument form"); } llvm::CallInst * -EmitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF, +emitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF, const clang::CallExpr *E, unsigned IntrinsicID, bool IsImageStore) { + auto findTextureDescIndex = [&CGF](const CallExpr *E) -> unsigned { + QualType TexQT = CGF.getContext().AMDGPUTextureTy; + for (unsigned I = 0, N = E->getNumArgs(); I < N; ++I) { + QualType ArgTy = E->getArg(I)->getType(); + if (ArgTy == TexQT) { + return I; + } + + if (ArgTy.getCanonicalType() == TexQT.getCanonicalType()) { + return I; + } + } + + return ~0U; + }; + clang::SmallVector Args; - unsigned RsrcIndex = GetTextureDescIndex(E->getBuiltinCallee(), E); + unsigned RsrcIndex = findTextureDescIndex(E); + + if (RsrcIndex == ~0U) { + llvm::report_fatal_error("Invalid argument count for image builtin"); + } for (unsigned I = 0; I < E->getNumArgs(); ++I) { llvm::Value *V = CGF.EmitScalarExpr(E->getArg(I)); if (I == RsrcIndex) - V = LoadTextureDescPtorAsVec8I32(CGF, V); + V = loadTextureDescPtorAsVec8I32(CGF, V); Args.push_back(V); } - llvm::Type *RetTy = CGF.ConvertType(E->getType()); - if (IsImageStore) - RetTy = CGF.VoidTy; + llvm::Type *RetTy = IsImageStore ? CGF.VoidTy : CGF.ConvertType(E->getType()); llvm::CallInst *Call = CGF.Builder.CreateIntrinsic(RetTy, IntrinsicID, Args); return Call; } @@ -1028,133 +1010,133 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, } case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_load_1d, false); case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_load_1darray, false); case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_load_2d, false); case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_load_2darray, false); case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_load_3d, false); case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_load_cube, false); case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_load_mip_1d, false); case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_load_mip_1darray, false); case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_load_mip_2d, false); case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_load_mip_2darray, false); case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_load_mip_3d, false); case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_load_mip_cube, false); case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_store_1d, true); case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_store_1darray, true); case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_store_2d, true); case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_store_2darray, true); case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_store_3d, true); case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_store_cube, true); case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_store_mip_1d, true); case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_store_mip_1darray, true); case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_store_mip_2d, true); case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_store_mip_2darray, true); case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_store_mip_3d, true); case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_store_mip_cube, true); case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_sample_1d, false); case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_sample_1darray, false); case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_sample_2d, false); case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_sample_2darray, false); case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_sample_3d, false); case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: - return EmitAMDGCNImageOverloadedReturnType( + return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_sample_cube, false); case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4: case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: { diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index 9da426917e05e..45fe80de53fbc 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -166,9 +166,9 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, unsigned ArgCount = TheCall->getNumArgs() - 1; llvm::APSInt Result; - return ((SemaRef.BuiltinConstantArg(TheCall, 0, Result)) || - (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) || - (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result))); + return (SemaRef.BuiltinConstantArg(TheCall, 0, Result)) || + (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) || + (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)); } case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32: case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32: @@ -210,9 +210,9 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, unsigned ArgCount = TheCall->getNumArgs() - 1; llvm::APSInt Result; - return ((SemaRef.BuiltinConstantArg(TheCall, 1, Result)) || - (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) || - (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result))); + return (SemaRef.BuiltinConstantArg(TheCall, 1, Result)) || + (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) || + (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)); } default: return false; diff --git a/clang/test/SemaOpenCL/builtins-image-load-param-gfx1100.cl b/clang/test/SemaOpenCL/builtins-image-load-param-gfx1100-err.cl similarity index 100% rename from clang/test/SemaOpenCL/builtins-image-load-param-gfx1100.cl rename to clang/test/SemaOpenCL/builtins-image-load-param-gfx1100-err.cl diff --git a/clang/test/SemaOpenCL/builtins-image-load-param-gfx942.cl b/clang/test/SemaOpenCL/builtins-image-load-param-gfx942-err.cl similarity index 100% rename from clang/test/SemaOpenCL/builtins-image-load-param-gfx942.cl rename to clang/test/SemaOpenCL/builtins-image-load-param-gfx942-err.cl diff --git a/clang/test/SemaOpenCL/builtins-image-store-param-gfx1100.cl b/clang/test/SemaOpenCL/builtins-image-store-param-gfx1100-err.cl similarity index 100% rename from clang/test/SemaOpenCL/builtins-image-store-param-gfx1100.cl rename to clang/test/SemaOpenCL/builtins-image-store-param-gfx1100-err.cl diff --git a/clang/test/SemaOpenCL/builtins-image-store-param-gfx942.cl b/clang/test/SemaOpenCL/builtins-image-store-param-gfx942-err.cl similarity index 100% rename from clang/test/SemaOpenCL/builtins-image-store-param-gfx942.cl rename to clang/test/SemaOpenCL/builtins-image-store-param-gfx942-err.cl