1+ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
12; RUN: opt < %s -passes=sroa -S | FileCheck %s
23target datalayout = "e-p:64:64:64-p1:16:16:16-p3:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
34
@@ -10,9 +11,11 @@ declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)* nocapture, i8 addrspace
1011; Make sure an illegal bitcast isn't introduced
1112define void @test_address_space_1_1 (<2 x i64 > addrspace (1 )* %a , i16 addrspace (1 )* %b ) {
1213; CHECK-LABEL: @test_address_space_1_1(
13- ; CHECK: load <2 x i64>, <2 x i64> addrspace(1)* %a, align 2
14- ; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2
15- ; CHECK: ret void
14+ ; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, <2 x i64> addrspace(1)* [[A:%.*]], align 2
15+ ; CHECK-NEXT: [[AA_0_BPTR_SROA_CAST:%.*]] = bitcast i16 addrspace(1)* [[B:%.*]] to <2 x i64> addrspace(1)*
16+ ; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], <2 x i64> addrspace(1)* [[AA_0_BPTR_SROA_CAST]], align 2
17+ ; CHECK-NEXT: ret void
18+ ;
1619 %aa = alloca <2 x i64 >, align 16
1720 %aptr = bitcast <2 x i64 > addrspace (1 )* %a to i8 addrspace (1 )*
1821 %aaptr = bitcast <2 x i64 >* %aa to i8*
@@ -24,9 +27,11 @@ define void @test_address_space_1_1(<2 x i64> addrspace(1)* %a, i16 addrspace(1)
2427
2528define void @test_address_space_1_0 (<2 x i64 > addrspace (1 )* %a , i16* %b ) {
2629; CHECK-LABEL: @test_address_space_1_0(
27- ; CHECK: load <2 x i64>, <2 x i64> addrspace(1)* %a, align 2
28- ; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2
29- ; CHECK: ret void
30+ ; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, <2 x i64> addrspace(1)* [[A:%.*]], align 2
31+ ; CHECK-NEXT: [[AA_0_BPTR_SROA_CAST:%.*]] = bitcast i16* [[B:%.*]] to <2 x i64>*
32+ ; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], <2 x i64>* [[AA_0_BPTR_SROA_CAST]], align 2
33+ ; CHECK-NEXT: ret void
34+ ;
3035 %aa = alloca <2 x i64 >, align 16
3136 %aptr = bitcast <2 x i64 > addrspace (1 )* %a to i8 addrspace (1 )*
3237 %aaptr = bitcast <2 x i64 >* %aa to i8*
@@ -38,9 +43,11 @@ define void @test_address_space_1_0(<2 x i64> addrspace(1)* %a, i16* %b) {
3843
3944define void @test_address_space_0_1 (<2 x i64 >* %a , i16 addrspace (1 )* %b ) {
4045; CHECK-LABEL: @test_address_space_0_1(
41- ; CHECK: load <2 x i64>, <2 x i64>* %a, align 2
42- ; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2
43- ; CHECK: ret void
46+ ; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, <2 x i64>* [[A:%.*]], align 2
47+ ; CHECK-NEXT: [[AA_0_BPTR_SROA_CAST:%.*]] = bitcast i16 addrspace(1)* [[B:%.*]] to <2 x i64> addrspace(1)*
48+ ; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], <2 x i64> addrspace(1)* [[AA_0_BPTR_SROA_CAST]], align 2
49+ ; CHECK-NEXT: ret void
50+ ;
4451 %aa = alloca <2 x i64 >, align 16
4552 %aptr = bitcast <2 x i64 >* %a to i8*
4653 %aaptr = bitcast <2 x i64 >* %aa to i8*
@@ -54,7 +61,23 @@ define void @test_address_space_0_1(<2 x i64>* %a, i16 addrspace(1)* %b) {
5461
5562define void @copy_struct ([5 x i64 ] %in.coerce , i8 addrspace (1 )* align 4 %ptr ) {
5663; CHECK-LABEL: @copy_struct(
57- ; CHECK-NOT: memcpy
64+ ; CHECK-NEXT: for.end:
65+ ; CHECK-NEXT: [[IN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [5 x i64] [[IN_COERCE:%.*]], 0
66+ ; CHECK-NEXT: [[IN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [5 x i64] [[IN_COERCE]], 1
67+ ; CHECK-NEXT: [[IN_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [5 x i64] [[IN_COERCE]], 2
68+ ; CHECK-NEXT: [[IN_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [5 x i64] [[IN_COERCE]], 3
69+ ; CHECK-NEXT: [[IN_SROA_2_4_PTR_SROA_CAST:%.*]] = bitcast i8 addrspace(1)* [[PTR:%.*]] to i32 addrspace(1)*
70+ ; CHECK-NEXT: [[IN_SROA_2_4_EXTRACT_SHIFT:%.*]] = lshr i64 [[IN_COERCE_FCA_2_EXTRACT]], 32
71+ ; CHECK-NEXT: [[IN_SROA_2_4_EXTRACT_TRUNC:%.*]] = trunc i64 [[IN_SROA_2_4_EXTRACT_SHIFT]] to i32
72+ ; CHECK-NEXT: store i32 [[IN_SROA_2_4_EXTRACT_TRUNC]], i32 addrspace(1)* [[IN_SROA_2_4_PTR_SROA_CAST]], align 4
73+ ; CHECK-NEXT: [[IN_SROA_4_20_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[PTR]], i16 4
74+ ; CHECK-NEXT: [[IN_SROA_4_20_PTR_SROA_CAST:%.*]] = bitcast i8 addrspace(1)* [[IN_SROA_4_20_PTR_SROA_IDX]] to i64 addrspace(1)*
75+ ; CHECK-NEXT: store i64 [[IN_COERCE_FCA_3_EXTRACT]], i64 addrspace(1)* [[IN_SROA_4_20_PTR_SROA_CAST]], align 4
76+ ; CHECK-NEXT: [[IN_SROA_5_20_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[PTR]], i16 12
77+ ; CHECK-NEXT: [[IN_SROA_5_20_PTR_SROA_CAST:%.*]] = bitcast i8 addrspace(1)* [[IN_SROA_5_20_PTR_SROA_IDX]] to i32 addrspace(1)*
78+ ; CHECK-NEXT: store i32 undef, i32 addrspace(1)* [[IN_SROA_5_20_PTR_SROA_CAST]], align 4
79+ ; CHECK-NEXT: ret void
80+ ;
5881for.end:
5982 %in = alloca %struct.struct_test_27.0.13 , align 8
6083 %0 = bitcast %struct.struct_test_27.0.13* %in to [5 x i64 ]*
@@ -64,7 +87,7 @@ for.end:
6487 call void @llvm.memcpy.p1i8.p0i8.i32 (i8 addrspace (1 )* align 4 %ptr , i8* align 4 %scevgep910 , i32 16 , i1 false )
6588 ret void
6689}
67-
90+
6891%union.anon = type { i32* }
6992
7093@g = common global i32 0 , align 4
@@ -74,8 +97,12 @@ for.end:
7497; illegal bitcast isn't introduced
7598define void @pr27557 () {
7699; CHECK-LABEL: @pr27557(
77- ; CHECK: %[[CAST:.*]] = bitcast i32** {{.*}} to i32 addrspace(3)**
78- ; CHECK: store i32 addrspace(3)* @l, i32 addrspace(3)** %[[CAST]]
100+ ; CHECK-NEXT: [[DOTSROA_0:%.*]] = alloca i32*, align 8
101+ ; CHECK-NEXT: store i32* @g, i32** [[DOTSROA_0]], align 8
102+ ; CHECK-NEXT: [[DOTSROA_0_0__SROA_CAST1:%.*]] = bitcast i32** [[DOTSROA_0]] to i32 addrspace(3)**
103+ ; CHECK-NEXT: store i32 addrspace(3)* @l, i32 addrspace(3)** [[DOTSROA_0_0__SROA_CAST1]], align 8
104+ ; CHECK-NEXT: ret void
105+ ;
79106 %1 = alloca %union.anon , align 8
80107 %2 = bitcast %union.anon* %1 to i32**
81108 store i32* @g , i32** %2 , align 8
@@ -90,7 +117,8 @@ define void @pr27557() {
90117; should be promoted through the pair of `ptrtoint`/`inttoptr`.
91118define i32* @pr27557.alt () {
92119; CHECK-LABEL: @pr27557.alt(
93- ; CHECK: ret i32* inttoptr (i64 ptrtoint (i32 addrspace(2)* @l2 to i64) to i32*)
120+ ; CHECK-NEXT: ret i32* inttoptr (i64 ptrtoint (i32 addrspace(2)* @l2 to i64) to i32*)
121+ ;
94122 %1 = alloca %union.anon , align 8
95123 %2 = bitcast %union.anon* %1 to i32 addrspace (2 )**
96124 store i32 addrspace (2 )* @l2 , i32 addrspace (2 )** %2 , align 8
@@ -101,30 +129,52 @@ define i32* @pr27557.alt() {
101129
102130; Make sure pre-splitting doesn't try to introduce an illegal bitcast
103131define float @presplit (i64 addrspace (1 )* %p ) {
104- entry:
105132; CHECK-LABEL: @presplit(
106- ; CHECK: %[[CAST:.*]] = bitcast i64 addrspace(1)* {{.*}} to i32 addrspace(1)*
107- ; CHECK: load i32, i32 addrspace(1)* %[[CAST]]
108- %b = alloca i64
109- %b.cast = bitcast i64* %b to [2 x float ]*
110- %b.gep1 = getelementptr [2 x float ], [2 x float ]* %b.cast , i32 0 , i32 0
111- %b.gep2 = getelementptr [2 x float ], [2 x float ]* %b.cast , i32 0 , i32 1
112- %l = load i64 , i64 addrspace (1 )* %p
113- store i64 %l , i64* %b
114- %f1 = load float , float * %b.gep1
115- %f2 = load float , float * %b.gep2
116- %ret = fadd float %f1 , %f2
117- ret float %ret
133+ ; CHECK-NEXT: entry:
134+ ; CHECK-NEXT: [[P_SROA_CAST:%.*]] = bitcast i64 addrspace(1)* [[P:%.*]] to i32 addrspace(1)*
135+ ; CHECK-NEXT: [[L1:%.*]] = load i32, i32 addrspace(1)* [[P_SROA_CAST]], align 4
136+ ; CHECK-NEXT: [[P_SROA_RAW_CAST:%.*]] = bitcast i64 addrspace(1)* [[P]] to i8 addrspace(1)*
137+ ; CHECK-NEXT: [[P_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[P_SROA_RAW_CAST]], i16 4
138+ ; CHECK-NEXT: [[P_SROA_CAST2:%.*]] = bitcast i8 addrspace(1)* [[P_SROA_RAW_IDX]] to i32 addrspace(1)*
139+ ; CHECK-NEXT: [[L3:%.*]] = load i32, i32 addrspace(1)* [[P_SROA_CAST2]], align 4
140+ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[L1]] to float
141+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[L3]] to float
142+ ; CHECK-NEXT: [[RET:%.*]] = fadd float [[TMP0]], [[TMP1]]
143+ ; CHECK-NEXT: ret float [[RET]]
144+ ;
145+ entry:
146+ %b = alloca i64
147+ %b.cast = bitcast i64* %b to [2 x float ]*
148+ %b.gep1 = getelementptr [2 x float ], [2 x float ]* %b.cast , i32 0 , i32 0
149+ %b.gep2 = getelementptr [2 x float ], [2 x float ]* %b.cast , i32 0 , i32 1
150+ %l = load i64 , i64 addrspace (1 )* %p
151+ store i64 %l , i64* %b
152+ %f1 = load float , float * %b.gep1
153+ %f2 = load float , float * %b.gep2
154+ %ret = fadd float %f1 , %f2
155+ ret float %ret
118156}
119157
120158; Test load from and store to non-zero address space.
121159define void @test_load_store_diff_addr_space ([2 x float ] addrspace (1 )* %complex1 , [2 x float ] addrspace (1 )* %complex2 ) {
122- ; CHECK-LABEL: @test_load_store_diff_addr_space
123- ; CHECK-NOT: alloca
124- ; CHECK: load i32, i32 addrspace(1)*
125- ; CHECK: load i32, i32 addrspace(1)*
126- ; CHECK: store i32 %{{.*}}, i32 addrspace(1)*
127- ; CHECK: store i32 %{{.*}}, i32 addrspace(1)*
160+ ; CHECK-LABEL: @test_load_store_diff_addr_space(
161+ ; CHECK-NEXT: [[P1_SROA_CAST:%.*]] = bitcast [2 x float] addrspace(1)* [[COMPLEX1:%.*]] to i32 addrspace(1)*
162+ ; CHECK-NEXT: [[V15:%.*]] = load i32, i32 addrspace(1)* [[P1_SROA_CAST]], align 4
163+ ; CHECK-NEXT: [[P1_SROA_IDX:%.*]] = getelementptr inbounds [2 x float], [2 x float] addrspace(1)* [[COMPLEX1]], i16 0, i16 1
164+ ; CHECK-NEXT: [[P1_SROA_CAST7:%.*]] = bitcast float addrspace(1)* [[P1_SROA_IDX]] to i32 addrspace(1)*
165+ ; CHECK-NEXT: [[V18:%.*]] = load i32, i32 addrspace(1)* [[P1_SROA_CAST7]], align 4
166+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[V15]] to float
167+ ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[V18]] to float
168+ ; CHECK-NEXT: [[SUM:%.*]] = fadd float [[TMP1]], [[TMP2]]
169+ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[SUM]] to i32
170+ ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[SUM]] to i32
171+ ; CHECK-NEXT: [[P2_SROA_CAST:%.*]] = bitcast [2 x float] addrspace(1)* [[COMPLEX2:%.*]] to i32 addrspace(1)*
172+ ; CHECK-NEXT: store i32 [[TMP3]], i32 addrspace(1)* [[P2_SROA_CAST]], align 4
173+ ; CHECK-NEXT: [[P2_SROA_IDX:%.*]] = getelementptr inbounds [2 x float], [2 x float] addrspace(1)* [[COMPLEX2]], i16 0, i16 1
174+ ; CHECK-NEXT: [[P2_SROA_CAST4:%.*]] = bitcast float addrspace(1)* [[P2_SROA_IDX]] to i32 addrspace(1)*
175+ ; CHECK-NEXT: store i32 [[TMP4]], i32 addrspace(1)* [[P2_SROA_CAST4]], align 4
176+ ; CHECK-NEXT: ret void
177+ ;
128178 %a = alloca i64
129179 %a.cast = bitcast i64* %a to [2 x float ]*
130180 %a.gep1 = getelementptr [2 x float ], [2 x float ]* %a.cast , i32 0 , i32 0
0 commit comments