|
4 | 4 | // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \ |
5 | 5 | // RUN: -mcode-object-version=5 -DUSER -x hip -o %t_5.bc %s |
6 | 6 |
|
| 7 | +// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \ |
| 8 | +// RUN: -mcode-object-version=6 -DUSER -x hip -o %t_6.bc %s |
| 9 | + |
7 | 10 | // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \ |
8 | 11 | // RUN: -mcode-object-version=none -DDEVICELIB -x hip -o %t_0.bc %s |
9 | 12 |
|
|
15 | 18 | // RUN: %t_5.bc -mlink-builtin-bitcode %t_0.bc -o - |\ |
16 | 19 | // RUN: FileCheck -check-prefix=LINKED5 %s |
17 | 20 |
|
| 21 | +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \ |
| 22 | +// RUN: %t_6.bc -mlink-builtin-bitcode %t_0.bc -o - |\ |
| 23 | +// RUN: FileCheck -check-prefix=LINKED6 %s |
| 24 | + |
18 | 25 | #include "Inputs/cuda.h" |
19 | 26 |
|
20 | 27 | // LINKED4: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400 |
|
77 | 84 | // LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef |
78 | 85 | // LINKED5: "amdgpu_code_object_version", i32 500 |
79 | 86 |
|
| 87 | +// LINKED6: __oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 |
| 88 | +// LINKED6-LABEL: bar |
| 89 | +// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} |
| 90 | +// LINKED6-NOT: icmp sge i32 %{{.*}}, 500 |
| 91 | +// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| 92 | +// LINKED6: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12 |
| 93 | +// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| 94 | +// LINKED6: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4 |
| 95 | +// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]] |
| 96 | +// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef |
| 97 | + |
| 98 | +// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} |
| 99 | +// LINKED6-NOT: icmp sge i32 %{{.*}}, 500 |
| 100 | +// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| 101 | +// LINKED6: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14 |
| 102 | +// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| 103 | +// LINKED6: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6 |
| 104 | +// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]] |
| 105 | +// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef |
| 106 | + |
| 107 | +// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} |
| 108 | +// LINKED6-NOT: icmp sge i32 %{{.*}}, 500 |
| 109 | +// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| 110 | +// LINKED6: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16 |
| 111 | +// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| 112 | +// LINKED6: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8 |
| 113 | +// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]] |
| 114 | +// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef |
| 115 | +// LINKED6: "amdgpu_code_object_version", i32 600 |
| 116 | + |
80 | 117 | #ifdef DEVICELIB |
81 | 118 | __device__ void bar(int *x, int *y, int *z) |
82 | 119 | { |
|
0 commit comments