66target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
77target triple = "spir64-unknown-unknown-sycldevice"
88
9- %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" = type { <16 x i32 > }
9+ %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" = type { <16 x i32 > }
1010
1111$"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test" = comdat any
1212
1313; CHECK: [[NEWGLOBAL:[@a-zA-Z0-9_]*]] = dso_local global <16 x i32> zeroinitializer, align 64 #0
14- @0 = dso_local global %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" zeroinitializer , align 64 #0
14+ @0 = dso_local global %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" zeroinitializer , align 64 #0
1515
1616; Function Attrs: norecurse
1717define weak_odr dso_local spir_kernel void @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test" (i32 addrspace (1 )* %_arg_ ) local_unnamed_addr #1 comdat !kernel_arg_addr_space !8 !kernel_arg_access_qual !9 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !11 !sycl_explicit_simd !12 !intel_reqd_sub_group_size !8 {
1818entry:
19- %vc.i = alloca %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" , align 64
20- %agg.tmp.i = alloca %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" , align 64
19+ %vc.i = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" , align 64
20+ %agg.tmp.i = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" , align 64
2121 %call.esimd.i.i.i.i.i = call <3 x i32 > @llvm.genx.local.id.v3i32 () #5
2222 %local_id.y.i.i.i.i.i = extractelement <3 x i32 > %call.esimd.i.i.i.i.i , i32 1
2323 %local_id.y.cast.ty.i.i.i.i.i = zext i32 %local_id.y.i.i.i.i.i to i64
@@ -36,15 +36,15 @@ entry:
3636 %group.id.x.cast.ty.i.i.i.i.i = zext i32 %group.id.x.i.i.i.i.i to i64
3737 %mul.i4.i.i.i.i = mul nuw i64 %group.id.x.cast.ty.i.i.i.i.i , %wgsize.x.cast.ty.i.i.i.i.i
3838 %add.i5.i.i.i.i = add i64 %mul.i4.i.i.i.i , %local_id.x.cast.ty.i.i.i.i.i
39- %0 = bitcast %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * %agg.tmp.i to i8*
39+ %0 = bitcast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * %agg.tmp.i to i8*
4040 call void @llvm.lifetime.start.p0i8 (i64 64 , i8* nonnull %0 )
41- %1 = bitcast %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * %vc.i to i8*
41+ %1 = bitcast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * %vc.i to i8*
4242 call void @llvm.lifetime.start.p0i8 (i64 64 , i8* nonnull %1 ) #5
4343 %conv.i = trunc i64 %add.i5.i.i.i.i to i32
44- %2 = addrspacecast %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * %vc.i to %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" addrspace (4 )*
44+ %2 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * %vc.i to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" addrspace (4 )*
4545 %splat.splatinsert.i.i = insertelement <16 x i32 > undef , i32 %conv.i , i32 0
4646 %splat.splat.i.i = shufflevector <16 x i32 > %splat.splatinsert.i.i , <16 x i32 > undef , <16 x i32 > zeroinitializer
47- %M_data.i13.i = getelementptr inbounds %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" , %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" addrspace (4 )* %2 , i64 0 , i32 0
47+ %M_data.i13.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" addrspace (4 )* %2 , i64 0 , i32 0
4848 store <16 x i32 > %splat.splat.i.i , <16 x i32 > addrspace (4 )* %M_data.i13.i , align 64 , !tbaa !13
4949 %conv3.i = trunc i64 %add.i.i.i.i.i to i32
5050 %splat.splatinsert.i20.i = insertelement <8 x i32 > undef , i32 %conv3.i , i32 0
@@ -56,17 +56,17 @@ entry:
5656 %..i = select i1 %cmp.i , i64 %add.i5.i.i.i.i , i64 %add.i.i.i.i.i
5757 %conv9.i = trunc i64 %..i to i32
5858; CHECK: store <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds ({{.+}}, {{.+}}* bitcast (<16 x i32>* [[NEWGLOBAL]] to {{.+}}*), i64 0, i32 0) to <16 x i32> addrspace(4)*), align 64, !tbaa.struct !16
59- store <16 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 >, <16 x i32 > addrspace (4 )* addrspacecast (<16 x i32 >* getelementptr inbounds (%"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" , %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * @0 , i64 0 , i32 0 ) to <16 x i32 > addrspace (4 )*), align 64 , !tbaa.struct !16
59+ store <16 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 >, <16 x i32 > addrspace (4 )* addrspacecast (<16 x i32 >* getelementptr inbounds (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * @0 , i64 0 , i32 0 ) to <16 x i32 > addrspace (4 )*), align 64 , !tbaa.struct !16
6060 %mul.i = shl nsw i32 %conv9.i , 4
6161 %idx.ext.i = sext i32 %mul.i to i64
6262 %add.ptr.i16 = getelementptr inbounds i32 , i32 addrspace (1 )* %_arg_ , i64 %idx.ext.i
6363 %add.ptr.i = addrspacecast i32 addrspace (1 )* %add.ptr.i16 to i32 addrspace (4 )*
64- %3 = addrspacecast %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * %agg.tmp.i to %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" addrspace (4 )*
64+ %3 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * %agg.tmp.i to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" addrspace (4 )*
6565 %call.esimd.i.i.i = call <16 x i32 > @llvm.genx.vload.v16i32.p4v16i32 (<16 x i32 > addrspace (4 )* %M_data.i13.i ) #5
66- %M_data.i2.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" , %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" addrspace (4 )* %3 , i64 0 , i32 0
66+ %M_data.i2.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" addrspace (4 )* %3 , i64 0 , i32 0
6767 call void @llvm.genx.vstore.v16i32.p4v16i32 (<16 x i32 > %call.esimd.i.i.i , <16 x i32 > addrspace (4 )* %M_data.i2.i.i ) #5
68- call spir_func void @_Z3fooPiN2cl4sycl5intel3gpu4simdIiLi16EEE (i32 addrspace (4 )* %add.ptr.i , %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * nonnull %agg.tmp.i ) #5
69- store <16 x i32 > <i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 >, <16 x i32 > addrspace (4 )* addrspacecast (<16 x i32 >* getelementptr inbounds (%"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" , %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * @0 , i64 0 , i32 0 ) to <16 x i32 > addrspace (4 )*), align 64 , !tbaa.struct !16
68+ call spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE (i32 addrspace (4 )* %add.ptr.i , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * nonnull %agg.tmp.i ) #5
69+ store <16 x i32 > <i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 >, <16 x i32 > addrspace (4 )* addrspacecast (<16 x i32 >* getelementptr inbounds (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * @0 , i64 0 , i32 0 ) to <16 x i32 > addrspace (4 )*), align 64 , !tbaa.struct !16
7070 call void @llvm.lifetime.end.p0i8 (i64 64 , i8* nonnull %1 ) #5
7171 call void @llvm.lifetime.end.p0i8 (i64 64 , i8* nonnull %0 )
7272 ret void
@@ -79,17 +79,17 @@ declare void @llvm.lifetime.start.p0i8(i64 immarg %0, i8* nocapture %1) #2
7979declare void @llvm.lifetime.end.p0i8 (i64 immarg %0 , i8* nocapture %1 ) #2
8080
8181; Function Attrs: noinline norecurse nounwind
82- define dso_local spir_func void @_Z3fooPiN2cl4sycl5intel3gpu4simdIiLi16EEE (i32 addrspace (4 )* %C , %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * %v ) local_unnamed_addr #3 !sycl_explicit_simd !12 {
82+ define dso_local spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE (i32 addrspace (4 )* %C , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * %v ) local_unnamed_addr #3 !sycl_explicit_simd !12 {
8383entry:
84- %agg.tmp = alloca %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" , align 64
85- %0 = addrspacecast %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * %v to %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" addrspace (4 )*
86- %1 = addrspacecast %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * %agg.tmp to %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" addrspace (4 )*
87- %M_data.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" , %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" addrspace (4 )* %0 , i64 0 , i32 0
84+ %agg.tmp = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" , align 64
85+ %0 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * %v to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" addrspace (4 )*
86+ %1 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * %agg.tmp to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" addrspace (4 )*
87+ %M_data.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" addrspace (4 )* %0 , i64 0 , i32 0
8888 %call.esimd.i.i = call <16 x i32 > @llvm.genx.vload.v16i32.p4v16i32 (<16 x i32 > addrspace (4 )* %M_data.i.i ), !noalias !17
8989; CHECK: {{.+}} = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* getelementptr ({{.+}}, {{.+}} addrspace(4)* addrspacecast ({{.+}}* bitcast (<16 x i32>* [[NEWGLOBAL]] to {{.+}}*) to {{.+}} addrspace(4)*), i64 0, i32 0)), !noalias !17
90- %call.esimd.i8.i = call <16 x i32 > @llvm.genx.vload.v16i32.p4v16i32 (<16 x i32 > addrspace (4 )* getelementptr (%"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" , %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" addrspace (4 )* addrspacecast (%"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * @0 to %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" addrspace (4 )*), i64 0 , i32 0 )), !noalias !17
90+ %call.esimd.i8.i = call <16 x i32 > @llvm.genx.vload.v16i32.p4v16i32 (<16 x i32 > addrspace (4 )* getelementptr (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" addrspace (4 )* addrspacecast (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * @0 to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" addrspace (4 )*), i64 0 , i32 0 )), !noalias !17
9191 %add.i = add <16 x i32 > %call.esimd.i8.i , %call.esimd.i.i
92- %M_data.i.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" , %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" addrspace (4 )* %1 , i64 0 , i32 0
92+ %M_data.i.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" addrspace (4 )* %1 , i64 0 , i32 0
9393 call void @llvm.genx.vstore.v16i32.p4v16i32 (<16 x i32 > %add.i , <16 x i32 > addrspace (4 )* %M_data.i.i.i )
9494 %2 = ptrtoint i32 addrspace (4 )* %C to i64
9595 %call.esimd.i.i2 = call <16 x i32 > @llvm.genx.vload.v16i32.p4v16i32 (<16 x i32 > addrspace (4 )* %M_data.i.i.i )
@@ -153,8 +153,8 @@ attributes #5 = { nounwind }
153153!15 = !{!"Simple C++ TBAA" }
154154!16 = !{i64 0 , i64 64 , !13 }
155155!17 = !{!18 }
156- !18 = distinct !{!18 , !19 , !"_ZNK2cl4sycl5intel3gpu4simdIiLi16EEplERKS4_ : %agg.result" }
157- !19 = distinct !{!19 , !"_ZNK2cl4sycl5intel3gpu4simdIiLi16EEplERKS4_ " }
156+ !18 = distinct !{!18 , !19 , !"_ZNK2cl4sycl5INTEL3gpu4simdIiLi16EEplERKS4_ : %agg.result" }
157+ !19 = distinct !{!19 , !"_ZNK2cl4sycl5INTEL3gpu4simdIiLi16EEplERKS4_ " }
158158!20 = !{i32 8275 }
159159!21 = !{i32 8268 }
160160!22 = !{i32 8269 }
0 commit comments