|
11 | 11 | //===----------------------------------------------------------------------===// |
12 | 12 |
|
13 | 13 | #include "CGBuiltin.h" |
| 14 | +#include "CodeGenFunction.h" |
14 | 15 | #include "clang/Basic/TargetBuiltins.h" |
| 16 | +#include "clang/Frontend/FrontendDiagnostic.h" |
15 | 17 | #include "llvm/Analysis/ValueTracking.h" |
| 18 | +#include "llvm/CodeGen/MachineFunction.h" |
16 | 19 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
17 | 20 | #include "llvm/IR/IntrinsicsR600.h" |
18 | 21 | #include "llvm/IR/MemoryModelRelaxationAnnotations.h" |
@@ -181,6 +184,74 @@ static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, |
181 | 184 | return Call; |
182 | 185 | } |
183 | 186 |
|
| 187 | +static llvm::Value *loadTextureDescPtorAsVec8I32(CodeGenFunction &CGF, |
| 188 | + llvm::Value *RsrcPtr) { |
| 189 | + auto &B = CGF.Builder; |
| 190 | + auto *VecTy = llvm::FixedVectorType::get(B.getInt32Ty(), 8); |
| 191 | + |
| 192 | + if (RsrcPtr->getType() == VecTy) |
| 193 | + return RsrcPtr; |
| 194 | + |
| 195 | + if (RsrcPtr->getType()->isIntegerTy(32)) { |
| 196 | + llvm::PointerType *VecPtrTy = |
| 197 | + llvm::PointerType::get(CGF.getLLVMContext(), 8); |
| 198 | + llvm::Value *Ptr = B.CreateIntToPtr(RsrcPtr, VecPtrTy, "tex.rsrc.from.int"); |
| 199 | + return B.CreateAlignedLoad(VecTy, Ptr, llvm::Align(32), "tex.rsrc.val"); |
| 200 | + } |
| 201 | + |
| 202 | + if (RsrcPtr->getType()->isPointerTy()) { |
| 203 | + auto *VecPtrTy = llvm::PointerType::get( |
| 204 | + CGF.getLLVMContext(), RsrcPtr->getType()->getPointerAddressSpace()); |
| 205 | + llvm::Value *Typed = B.CreateBitCast(RsrcPtr, VecPtrTy, "tex.rsrc.typed"); |
| 206 | + return B.CreateAlignedLoad(VecTy, Typed, llvm::Align(32), "tex.rsrc.val"); |
| 207 | + } |
| 208 | + |
| 209 | + const auto &DL = CGF.CGM.getDataLayout(); |
| 210 | + if (DL.getTypeSizeInBits(RsrcPtr->getType()) == 256) |
| 211 | + return B.CreateBitCast(RsrcPtr, VecTy, "tex.rsrc.val"); |
| 212 | + |
| 213 | + llvm::report_fatal_error("Unexpected texture resource argument form"); |
| 214 | +} |
| 215 | + |
| 216 | +llvm::CallInst * |
| 217 | +emitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF, |
| 218 | + const clang::CallExpr *E, |
| 219 | + unsigned IntrinsicID, bool IsImageStore) { |
| 220 | + auto findTextureDescIndex = [&CGF](const CallExpr *E) -> unsigned { |
| 221 | + QualType TexQT = CGF.getContext().AMDGPUTextureTy; |
| 222 | + for (unsigned I = 0, N = E->getNumArgs(); I < N; ++I) { |
| 223 | + QualType ArgTy = E->getArg(I)->getType(); |
| 224 | + if (ArgTy == TexQT) { |
| 225 | + return I; |
| 226 | + } |
| 227 | + |
| 228 | + if (ArgTy.getCanonicalType() == TexQT.getCanonicalType()) { |
| 229 | + return I; |
| 230 | + } |
| 231 | + } |
| 232 | + |
| 233 | + return ~0U; |
| 234 | + }; |
| 235 | + |
| 236 | + clang::SmallVector<llvm::Value *, 10> Args; |
| 237 | + unsigned RsrcIndex = findTextureDescIndex(E); |
| 238 | + |
| 239 | + if (RsrcIndex == ~0U) { |
| 240 | + llvm::report_fatal_error("Invalid argument count for image builtin"); |
| 241 | + } |
| 242 | + |
| 243 | + for (unsigned I = 0; I < E->getNumArgs(); ++I) { |
| 244 | + llvm::Value *V = CGF.EmitScalarExpr(E->getArg(I)); |
| 245 | + if (I == RsrcIndex) |
| 246 | + V = loadTextureDescPtorAsVec8I32(CGF, V); |
| 247 | + Args.push_back(V); |
| 248 | + } |
| 249 | + |
| 250 | + llvm::Type *RetTy = IsImageStore ? CGF.VoidTy : CGF.ConvertType(E->getType()); |
| 251 | + llvm::CallInst *Call = CGF.Builder.CreateIntrinsic(RetTy, IntrinsicID, Args); |
| 252 | + return Call; |
| 253 | +} |
| 254 | + |
184 | 255 | // Emit an intrinsic that has 1 float or double operand, and 1 integer. |
185 | 256 | static Value *emitFPIntBuiltin(CodeGenFunction &CGF, |
186 | 257 | const CallExpr *E, |
@@ -937,6 +1008,136 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, |
937 | 1008 |
|
938 | 1009 | return Builder.CreateInsertElement(I0, A, 1); |
939 | 1010 | } |
| 1011 | + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32: |
| 1012 | + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: |
| 1013 | + return emitAMDGCNImageOverloadedReturnType( |
| 1014 | + *this, E, Intrinsic::amdgcn_image_load_1d, false); |
| 1015 | + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32: |
| 1016 | + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32: |
| 1017 | + return emitAMDGCNImageOverloadedReturnType( |
| 1018 | + *this, E, Intrinsic::amdgcn_image_load_1darray, false); |
| 1019 | + case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: |
| 1020 | + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32: |
| 1021 | + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32: |
| 1022 | + return emitAMDGCNImageOverloadedReturnType( |
| 1023 | + *this, E, Intrinsic::amdgcn_image_load_2d, false); |
| 1024 | + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32: |
| 1025 | + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32: |
| 1026 | + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32: |
| 1027 | + return emitAMDGCNImageOverloadedReturnType( |
| 1028 | + *this, E, Intrinsic::amdgcn_image_load_2darray, false); |
| 1029 | + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32: |
| 1030 | + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32: |
| 1031 | + return emitAMDGCNImageOverloadedReturnType( |
| 1032 | + *this, E, Intrinsic::amdgcn_image_load_3d, false); |
| 1033 | + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32: |
| 1034 | + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32: |
| 1035 | + return emitAMDGCNImageOverloadedReturnType( |
| 1036 | + *this, E, Intrinsic::amdgcn_image_load_cube, false); |
| 1037 | + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32: |
| 1038 | + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32: |
| 1039 | + return emitAMDGCNImageOverloadedReturnType( |
| 1040 | + *this, E, Intrinsic::amdgcn_image_load_mip_1d, false); |
| 1041 | + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32: |
| 1042 | + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32: |
| 1043 | + return emitAMDGCNImageOverloadedReturnType( |
| 1044 | + *this, E, Intrinsic::amdgcn_image_load_mip_1darray, false); |
| 1045 | + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32: |
| 1046 | + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32: |
| 1047 | + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32: |
| 1048 | + return emitAMDGCNImageOverloadedReturnType( |
| 1049 | + *this, E, Intrinsic::amdgcn_image_load_mip_2d, false); |
| 1050 | + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32: |
| 1051 | + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32: |
| 1052 | + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32: |
| 1053 | + return emitAMDGCNImageOverloadedReturnType( |
| 1054 | + *this, E, Intrinsic::amdgcn_image_load_mip_2darray, false); |
| 1055 | + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32: |
| 1056 | + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32: |
| 1057 | + return emitAMDGCNImageOverloadedReturnType( |
| 1058 | + *this, E, Intrinsic::amdgcn_image_load_mip_3d, false); |
| 1059 | + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32: |
| 1060 | + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: |
| 1061 | + return emitAMDGCNImageOverloadedReturnType( |
| 1062 | + *this, E, Intrinsic::amdgcn_image_load_mip_cube, false); |
| 1063 | + case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32: |
| 1064 | + case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32: |
| 1065 | + return emitAMDGCNImageOverloadedReturnType( |
| 1066 | + *this, E, Intrinsic::amdgcn_image_store_1d, true); |
| 1067 | + case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32: |
| 1068 | + case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32: |
| 1069 | + return emitAMDGCNImageOverloadedReturnType( |
| 1070 | + *this, E, Intrinsic::amdgcn_image_store_1darray, true); |
| 1071 | + case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32: |
| 1072 | + case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32: |
| 1073 | + case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32: |
| 1074 | + return emitAMDGCNImageOverloadedReturnType( |
| 1075 | + *this, E, Intrinsic::amdgcn_image_store_2d, true); |
| 1076 | + case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32: |
| 1077 | + case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32: |
| 1078 | + case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32: |
| 1079 | + return emitAMDGCNImageOverloadedReturnType( |
| 1080 | + *this, E, Intrinsic::amdgcn_image_store_2darray, true); |
| 1081 | + case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32: |
| 1082 | + case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32: |
| 1083 | + return emitAMDGCNImageOverloadedReturnType( |
| 1084 | + *this, E, Intrinsic::amdgcn_image_store_3d, true); |
| 1085 | + case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32: |
| 1086 | + case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32: |
| 1087 | + return emitAMDGCNImageOverloadedReturnType( |
| 1088 | + *this, E, Intrinsic::amdgcn_image_store_cube, true); |
| 1089 | + case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32: |
| 1090 | + case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32: |
| 1091 | + return emitAMDGCNImageOverloadedReturnType( |
| 1092 | + *this, E, Intrinsic::amdgcn_image_store_mip_1d, true); |
| 1093 | + case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32: |
| 1094 | + case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32: |
| 1095 | + return emitAMDGCNImageOverloadedReturnType( |
| 1096 | + *this, E, Intrinsic::amdgcn_image_store_mip_1darray, true); |
| 1097 | + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32: |
| 1098 | + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32: |
| 1099 | + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32: |
| 1100 | + return emitAMDGCNImageOverloadedReturnType( |
| 1101 | + *this, E, Intrinsic::amdgcn_image_store_mip_2d, true); |
| 1102 | + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32: |
| 1103 | + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32: |
| 1104 | + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32: |
| 1105 | + return emitAMDGCNImageOverloadedReturnType( |
| 1106 | + *this, E, Intrinsic::amdgcn_image_store_mip_2darray, true); |
| 1107 | + case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32: |
| 1108 | + case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32: |
| 1109 | + return emitAMDGCNImageOverloadedReturnType( |
| 1110 | + *this, E, Intrinsic::amdgcn_image_store_mip_3d, true); |
| 1111 | + case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32: |
| 1112 | + case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: |
| 1113 | + return emitAMDGCNImageOverloadedReturnType( |
| 1114 | + *this, E, Intrinsic::amdgcn_image_store_mip_cube, true); |
| 1115 | + case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32: |
| 1116 | + case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32: |
| 1117 | + return emitAMDGCNImageOverloadedReturnType( |
| 1118 | + *this, E, Intrinsic::amdgcn_image_sample_1d, false); |
| 1119 | + case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32: |
| 1120 | + case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32: |
| 1121 | + return emitAMDGCNImageOverloadedReturnType( |
| 1122 | + *this, E, Intrinsic::amdgcn_image_sample_1darray, false); |
| 1123 | + case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32: |
| 1124 | + case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32: |
| 1125 | + case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32: |
| 1126 | + return emitAMDGCNImageOverloadedReturnType( |
| 1127 | + *this, E, Intrinsic::amdgcn_image_sample_2d, false); |
| 1128 | + case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32: |
| 1129 | + case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32: |
| 1130 | + case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32: |
| 1131 | + return emitAMDGCNImageOverloadedReturnType( |
| 1132 | + *this, E, Intrinsic::amdgcn_image_sample_2darray, false); |
| 1133 | + case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32: |
| 1134 | + case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32: |
| 1135 | + return emitAMDGCNImageOverloadedReturnType( |
| 1136 | + *this, E, Intrinsic::amdgcn_image_sample_3d, false); |
| 1137 | + case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32: |
| 1138 | + case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: |
| 1139 | + return emitAMDGCNImageOverloadedReturnType( |
| 1140 | + *this, E, Intrinsic::amdgcn_image_sample_cube, false); |
940 | 1141 | case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4: |
941 | 1142 | case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: { |
942 | 1143 | llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8); |
|
0 commit comments