Skip to content

Commit 10de911

Browse files
committed
[ARM][SLP] Fix incorrect cost function for SLP Vectorization of ZExt/SExt
PR #117350 made changes to the SLP vectorizer which introduced a regression on ARM vectorization benchmarks. This was due to the changes assuming that SExt/ZExt vector instructions have constant cost. This behaviour is expected for RISCV but not on ARM where we take into account source and destination type of SExt/ZExt instructions when calculating vector cost. Change-Id: I6f995dcde26e5aaf62b779b63e52988fb333f941
1 parent 05bd7d2 commit 10de911

File tree

2 files changed

+367
-5
lines changed

2 files changed

+367
-5
lines changed

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1791,11 +1791,19 @@ InstructionCost ARMTTIImpl::getExtendedReductionCost(
17911791

17921792
int ISD = TLI->InstructionOpcodeToISD(Opcode);
17931793

1794+
bool IsArithmeticExtendedReduction = is_contained(
1795+
{ISD::ADD, ISD::FADD, ISD::MUL, ISD::FMUL, ISD::AND, ISD::OR, ISD::XOR},
1796+
ISD);
1797+
InstructionCost CastCost =
1798+
(IsArithmeticExtendedReduction)
1799+
? getCastInstrCost(Opcode, ResTy, ValTy, TTI::CastContextHint::None,
1800+
CostKind)
1801+
: 0;
1802+
17941803
switch (ISD) {
17951804
case ISD::ADD:
17961805
if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {
17971806
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
1798-
17991807
// The legal cases are:
18001808
// VADDV u/s 8/16/32
18011809
// VADDLV u/s 32
@@ -1807,14 +1815,14 @@ InstructionCost ARMTTIImpl::getExtendedReductionCost(
18071815
((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
18081816
(LT.second == MVT::v8i16 && RevVTSize <= 32) ||
18091817
(LT.second == MVT::v4i32 && RevVTSize <= 64)))
1810-
return ST->getMVEVectorCostFactor(CostKind) * LT.first;
1818+
return CastCost + ST->getMVEVectorCostFactor(CostKind) * LT.first;
18111819
}
1812-
break;
1820+
18131821
default:
18141822
break;
18151823
}
1816-
return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy, FMF,
1817-
CostKind);
1824+
return CastCost + BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy,
1825+
ValTy, FMF, CostKind);
18181826
}
18191827

18201828
InstructionCost
Lines changed: 354 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,354 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=arm-none-eabi --mattr=+mve | FileCheck %s
3+
4+
5+
define dso_local i64 @vadd(ptr noundef %0) #0 {
6+
; CHECK-LABEL: define dso_local i64 @vadd(
7+
; CHECK-SAME: ptr noundef [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
8+
; CHECK-NEXT: [[TMP2:%.*]] = alloca ptr, align 4
9+
; CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4
10+
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4
11+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
12+
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
13+
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
14+
; CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP2]], align 4
15+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1
16+
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
17+
; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
18+
; CHECK-NEXT: [[TMP11:%.*]] = add nsw i64 [[TMP6]], [[TMP10]]
19+
; CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP2]], align 4
20+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 2
21+
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
22+
; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
23+
; CHECK-NEXT: [[TMP16:%.*]] = add nsw i64 [[TMP11]], [[TMP15]]
24+
; CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4
25+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 3
26+
; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4
27+
; CHECK-NEXT: [[TMP20:%.*]] = sext i32 [[TMP19]] to i64
28+
; CHECK-NEXT: [[TMP21:%.*]] = add nsw i64 [[TMP16]], [[TMP20]]
29+
; CHECK-NEXT: ret i64 [[TMP21]]
30+
;
31+
%2 = alloca ptr, align 4
32+
store ptr %0, ptr %2, align 4
33+
%3 = load ptr, ptr %2, align 4
34+
%4 = getelementptr inbounds i32, ptr %3, i32 0
35+
%5 = load i32, ptr %4, align 4
36+
%6 = sext i32 %5 to i64
37+
%7 = load ptr, ptr %2, align 4
38+
%8 = getelementptr inbounds i32, ptr %7, i32 1
39+
%9 = load i32, ptr %8, align 4
40+
%10 = sext i32 %9 to i64
41+
%11 = add nsw i64 %6, %10
42+
%12 = load ptr, ptr %2, align 4
43+
%13 = getelementptr inbounds i32, ptr %12, i32 2
44+
%14 = load i32, ptr %13, align 4
45+
%15 = sext i32 %14 to i64
46+
%16 = add nsw i64 %11, %15
47+
%17 = load ptr, ptr %2, align 4
48+
%18 = getelementptr inbounds i32, ptr %17, i32 3
49+
%19 = load i32, ptr %18, align 4
50+
%20 = sext i32 %19 to i64
51+
%21 = add nsw i64 %16, %20
52+
ret i64 %21
53+
}
54+
55+
define dso_local i64 @vmul(ptr noundef %0) #0 {
56+
; CHECK-LABEL: define dso_local i64 @vmul(
57+
; CHECK-SAME: ptr noundef [[TMP0:%.*]]) #[[ATTR0]] {
58+
; CHECK-NEXT: [[TMP2:%.*]] = alloca ptr, align 4
59+
; CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4
60+
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4
61+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
62+
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
63+
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
64+
; CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP2]], align 4
65+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1
66+
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
67+
; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
68+
; CHECK-NEXT: [[TMP11:%.*]] = mul nsw i64 [[TMP6]], [[TMP10]]
69+
; CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP2]], align 4
70+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 2
71+
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
72+
; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
73+
; CHECK-NEXT: [[TMP16:%.*]] = mul nsw i64 [[TMP11]], [[TMP15]]
74+
; CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4
75+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 3
76+
; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4
77+
; CHECK-NEXT: [[TMP20:%.*]] = sext i32 [[TMP19]] to i64
78+
; CHECK-NEXT: [[TMP21:%.*]] = mul nsw i64 [[TMP16]], [[TMP20]]
79+
; CHECK-NEXT: ret i64 [[TMP21]]
80+
;
81+
%2 = alloca ptr, align 4
82+
store ptr %0, ptr %2, align 4
83+
%3 = load ptr, ptr %2, align 4
84+
%4 = getelementptr inbounds i32, ptr %3, i32 0
85+
%5 = load i32, ptr %4, align 4
86+
%6 = sext i32 %5 to i64
87+
%7 = load ptr, ptr %2, align 4
88+
%8 = getelementptr inbounds i32, ptr %7, i32 1
89+
%9 = load i32, ptr %8, align 4
90+
%10 = sext i32 %9 to i64
91+
%11 = mul nsw i64 %6, %10
92+
%12 = load ptr, ptr %2, align 4
93+
%13 = getelementptr inbounds i32, ptr %12, i32 2
94+
%14 = load i32, ptr %13, align 4
95+
%15 = sext i32 %14 to i64
96+
%16 = mul nsw i64 %11, %15
97+
%17 = load ptr, ptr %2, align 4
98+
%18 = getelementptr inbounds i32, ptr %17, i32 3
99+
%19 = load i32, ptr %18, align 4
100+
%20 = sext i32 %19 to i64
101+
%21 = mul nsw i64 %16, %20
102+
ret i64 %21
103+
}
104+
105+
define dso_local i64 @vand(ptr noundef %0) #0 {
106+
; CHECK-LABEL: define dso_local i64 @vand(
107+
; CHECK-SAME: ptr noundef [[TMP0:%.*]]) #[[ATTR0]] {
108+
; CHECK-NEXT: [[TMP2:%.*]] = alloca ptr, align 4
109+
; CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4
110+
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4
111+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
112+
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
113+
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
114+
; CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP2]], align 4
115+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1
116+
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
117+
; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
118+
; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP6]], [[TMP10]]
119+
; CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP2]], align 4
120+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 2
121+
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
122+
; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
123+
; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP11]], [[TMP15]]
124+
; CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4
125+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 3
126+
; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4
127+
; CHECK-NEXT: [[TMP20:%.*]] = sext i32 [[TMP19]] to i64
128+
; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP16]], [[TMP20]]
129+
; CHECK-NEXT: ret i64 [[TMP21]]
130+
;
131+
%2 = alloca ptr, align 4
132+
store ptr %0, ptr %2, align 4
133+
%3 = load ptr, ptr %2, align 4
134+
%4 = getelementptr inbounds i32, ptr %3, i32 0
135+
%5 = load i32, ptr %4, align 4
136+
%6 = sext i32 %5 to i64
137+
%7 = load ptr, ptr %2, align 4
138+
%8 = getelementptr inbounds i32, ptr %7, i32 1
139+
%9 = load i32, ptr %8, align 4
140+
%10 = sext i32 %9 to i64
141+
%11 = and i64 %6, %10
142+
%12 = load ptr, ptr %2, align 4
143+
%13 = getelementptr inbounds i32, ptr %12, i32 2
144+
%14 = load i32, ptr %13, align 4
145+
%15 = sext i32 %14 to i64
146+
%16 = and i64 %11, %15
147+
%17 = load ptr, ptr %2, align 4
148+
%18 = getelementptr inbounds i32, ptr %17, i32 3
149+
%19 = load i32, ptr %18, align 4
150+
%20 = sext i32 %19 to i64
151+
%21 = and i64 %16, %20
152+
ret i64 %21
153+
}
154+
155+
define dso_local i64 @vor(ptr noundef %0) #0 {
156+
; CHECK-LABEL: define dso_local i64 @vor(
157+
; CHECK-SAME: ptr noundef [[TMP0:%.*]]) #[[ATTR0]] {
158+
; CHECK-NEXT: [[TMP2:%.*]] = alloca ptr, align 4
159+
; CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4
160+
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4
161+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
162+
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
163+
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
164+
; CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP2]], align 4
165+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1
166+
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
167+
; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
168+
; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP6]], [[TMP10]]
169+
; CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP2]], align 4
170+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 2
171+
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
172+
; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
173+
; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP11]], [[TMP15]]
174+
; CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4
175+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 3
176+
; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4
177+
; CHECK-NEXT: [[TMP20:%.*]] = sext i32 [[TMP19]] to i64
178+
; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP16]], [[TMP20]]
179+
; CHECK-NEXT: ret i64 [[TMP21]]
180+
;
181+
%2 = alloca ptr, align 4
182+
store ptr %0, ptr %2, align 4
183+
%3 = load ptr, ptr %2, align 4
184+
%4 = getelementptr inbounds i32, ptr %3, i32 0
185+
%5 = load i32, ptr %4, align 4
186+
%6 = sext i32 %5 to i64
187+
%7 = load ptr, ptr %2, align 4
188+
%8 = getelementptr inbounds i32, ptr %7, i32 1
189+
%9 = load i32, ptr %8, align 4
190+
%10 = sext i32 %9 to i64
191+
%11 = or i64 %6, %10
192+
%12 = load ptr, ptr %2, align 4
193+
%13 = getelementptr inbounds i32, ptr %12, i32 2
194+
%14 = load i32, ptr %13, align 4
195+
%15 = sext i32 %14 to i64
196+
%16 = or i64 %11, %15
197+
%17 = load ptr, ptr %2, align 4
198+
%18 = getelementptr inbounds i32, ptr %17, i32 3
199+
%19 = load i32, ptr %18, align 4
200+
%20 = sext i32 %19 to i64
201+
%21 = or i64 %16, %20
202+
ret i64 %21
203+
}
204+
205+
define dso_local i64 @vxor(ptr noundef %0) #0 {
206+
; CHECK-LABEL: define dso_local i64 @vxor(
207+
; CHECK-SAME: ptr noundef [[TMP0:%.*]]) #[[ATTR0]] {
208+
; CHECK-NEXT: [[TMP2:%.*]] = alloca ptr, align 4
209+
; CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4
210+
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4
211+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
212+
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
213+
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
214+
; CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP2]], align 4
215+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1
216+
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
217+
; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
218+
; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP6]], [[TMP10]]
219+
; CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP2]], align 4
220+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 2
221+
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
222+
; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
223+
; CHECK-NEXT: [[TMP16:%.*]] = xor i64 [[TMP11]], [[TMP15]]
224+
; CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4
225+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 3
226+
; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4
227+
; CHECK-NEXT: [[TMP20:%.*]] = sext i32 [[TMP19]] to i64
228+
; CHECK-NEXT: [[TMP21:%.*]] = xor i64 [[TMP16]], [[TMP20]]
229+
; CHECK-NEXT: ret i64 [[TMP21]]
230+
;
231+
%2 = alloca ptr, align 4
232+
store ptr %0, ptr %2, align 4
233+
%3 = load ptr, ptr %2, align 4
234+
%4 = getelementptr inbounds i32, ptr %3, i32 0
235+
%5 = load i32, ptr %4, align 4
236+
%6 = sext i32 %5 to i64
237+
%7 = load ptr, ptr %2, align 4
238+
%8 = getelementptr inbounds i32, ptr %7, i32 1
239+
%9 = load i32, ptr %8, align 4
240+
%10 = sext i32 %9 to i64
241+
%11 = xor i64 %6, %10
242+
%12 = load ptr, ptr %2, align 4
243+
%13 = getelementptr inbounds i32, ptr %12, i32 2
244+
%14 = load i32, ptr %13, align 4
245+
%15 = sext i32 %14 to i64
246+
%16 = xor i64 %11, %15
247+
%17 = load ptr, ptr %2, align 4
248+
%18 = getelementptr inbounds i32, ptr %17, i32 3
249+
%19 = load i32, ptr %18, align 4
250+
%20 = sext i32 %19 to i64
251+
%21 = xor i64 %16, %20
252+
ret i64 %21
253+
}
254+
255+
define dso_local double @vfadd(ptr noundef %0) #0 {
256+
; CHECK-LABEL: define dso_local double @vfadd(
257+
; CHECK-SAME: ptr noundef [[TMP0:%.*]]) #[[ATTR0]] {
258+
; CHECK-NEXT: [[TMP2:%.*]] = alloca ptr, align 4
259+
; CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4
260+
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4
261+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 0
262+
; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP4]], align 4
263+
; CHECK-NEXT: [[TMP6:%.*]] = fpext float [[TMP5]] to double
264+
; CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP2]], align 4
265+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 1
266+
; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP8]], align 4
267+
; CHECK-NEXT: [[TMP10:%.*]] = fpext float [[TMP9]] to double
268+
; CHECK-NEXT: [[TMP11:%.*]] = fadd double [[TMP6]], [[TMP10]]
269+
; CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP2]], align 4
270+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 2
271+
; CHECK-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4
272+
; CHECK-NEXT: [[TMP15:%.*]] = fpext float [[TMP14]] to double
273+
; CHECK-NEXT: [[TMP16:%.*]] = fadd double [[TMP11]], [[TMP15]]
274+
; CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4
275+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 3
276+
; CHECK-NEXT: [[TMP19:%.*]] = load float, ptr [[TMP18]], align 4
277+
; CHECK-NEXT: [[TMP20:%.*]] = fpext float [[TMP19]] to double
278+
; CHECK-NEXT: [[TMP21:%.*]] = fadd double [[TMP16]], [[TMP20]]
279+
; CHECK-NEXT: ret double [[TMP21]]
280+
;
281+
%2 = alloca ptr, align 4
282+
store ptr %0, ptr %2, align 4
283+
%3 = load ptr, ptr %2, align 4
284+
%4 = getelementptr inbounds float, ptr %3, i32 0
285+
%5 = load float, ptr %4, align 4
286+
%6 = fpext float %5 to double
287+
%7 = load ptr, ptr %2, align 4
288+
%8 = getelementptr inbounds float, ptr %7, i32 1
289+
%9 = load float, ptr %8, align 4
290+
%10 = fpext float %9 to double
291+
%11 = fadd double %6, %10
292+
%12 = load ptr, ptr %2, align 4
293+
%13 = getelementptr inbounds float, ptr %12, i32 2
294+
%14 = load float, ptr %13, align 4
295+
%15 = fpext float %14 to double
296+
%16 = fadd double %11, %15
297+
%17 = load ptr, ptr %2, align 4
298+
%18 = getelementptr inbounds float, ptr %17, i32 3
299+
%19 = load float, ptr %18, align 4
300+
%20 = fpext float %19 to double
301+
%21 = fadd double %16, %20
302+
ret double %21
303+
}
304+
305+
define dso_local double @vfmul(ptr noundef %0) #0 {
306+
; CHECK-LABEL: define dso_local double @vfmul(
307+
; CHECK-SAME: ptr noundef [[TMP0:%.*]]) #[[ATTR0]] {
308+
; CHECK-NEXT: [[TMP2:%.*]] = alloca ptr, align 4
309+
; CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4
310+
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4
311+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 0
312+
; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP4]], align 4
313+
; CHECK-NEXT: [[TMP6:%.*]] = fpext float [[TMP5]] to double
314+
; CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP2]], align 4
315+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 1
316+
; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP8]], align 4
317+
; CHECK-NEXT: [[TMP10:%.*]] = fpext float [[TMP9]] to double
318+
; CHECK-NEXT: [[TMP11:%.*]] = fmul double [[TMP6]], [[TMP10]]
319+
; CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP2]], align 4
320+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 2
321+
; CHECK-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4
322+
; CHECK-NEXT: [[TMP15:%.*]] = fpext float [[TMP14]] to double
323+
; CHECK-NEXT: [[TMP16:%.*]] = fmul double [[TMP11]], [[TMP15]]
324+
; CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4
325+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 3
326+
; CHECK-NEXT: [[TMP19:%.*]] = load float, ptr [[TMP18]], align 4
327+
; CHECK-NEXT: [[TMP20:%.*]] = fpext float [[TMP19]] to double
328+
; CHECK-NEXT: [[TMP21:%.*]] = fmul double [[TMP16]], [[TMP20]]
329+
; CHECK-NEXT: ret double [[TMP21]]
330+
;
331+
%2 = alloca ptr, align 4
332+
store ptr %0, ptr %2, align 4
333+
%3 = load ptr, ptr %2, align 4
334+
%4 = getelementptr inbounds float, ptr %3, i32 0
335+
%5 = load float, ptr %4, align 4
336+
%6 = fpext float %5 to double
337+
%7 = load ptr, ptr %2, align 4
338+
%8 = getelementptr inbounds float, ptr %7, i32 1
339+
%9 = load float, ptr %8, align 4
340+
%10 = fpext float %9 to double
341+
%11 = fmul double %6, %10
342+
%12 = load ptr, ptr %2, align 4
343+
%13 = getelementptr inbounds float, ptr %12, i32 2
344+
%14 = load float, ptr %13, align 4
345+
%15 = fpext float %14 to double
346+
%16 = fmul double %11, %15
347+
%17 = load ptr, ptr %2, align 4
348+
%18 = getelementptr inbounds float, ptr %17, i32 3
349+
%19 = load float, ptr %18, align 4
350+
%20 = fpext float %19 to double
351+
%21 = fmul double %16, %20
352+
ret double %21
353+
}
354+

0 commit comments

Comments
 (0)