Skip to content

Commit eac91d5

Browse files
nemanja-ibmtstellar
authored andcommitted
[PowerPC] Prevent legalization loop from promoting SELECT_CC from v4i32 to v4i32
As reported in https://bugs.llvm.org/show_bug.cgi?id=45709 we can hit an infinite loop in legalization since we set the legalization action for ISD::SELECT_CC for all fixed length vector types to Promote. Without some different legalization action for the type being promoted to, the legalizer simply loops. Since we don't have patterns to match the node, the right legalization action should be Expand. Differential revision: https://reviews.llvm.org/D79854 (cherry picked from commit 793cc51)
1 parent 1abba52 commit eac91d5

File tree

2 files changed

+59
-0
lines changed

2 files changed

+59
-0
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -694,6 +694,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
694694
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
695695
}
696696
}
697+
setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
697698
if (!Subtarget.hasP8Vector()) {
698699
setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
699700
setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
3+
; RUN: -mcpu=pwr6 -ppc-asm-full-reg-names -mattr=-vsx \
4+
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
5+
6+
; There is code in the SDAG to expand FMAX/FMIN with fast flags to SELECT_CC.
7+
; On PPC, we had SELECT_CC legalized using Promote for all vector types
8+
; (including the type that they are all promoted to - which caused an infinite
9+
; loop in legalization). This test just ensures that we terminate on such input.
10+
define dso_local void @_ZN1a1bEv(<4 x float> %in) local_unnamed_addr #0 align 2 {
11+
; CHECK-LABEL: _ZN1a1bEv:
12+
; CHECK: # %bb.0:
13+
; CHECK-NEXT: bclr 12, 4*cr5+lt, 0
14+
; CHECK-NEXT: # %bb.1: # %.preheader
15+
; CHECK-NEXT: addis r3, r2, .LCPI0_0@toc@ha
16+
; CHECK-NEXT: vxor v3, v3, v3
17+
; CHECK-NEXT: addi r3, r3, .LCPI0_0@toc@l
18+
; CHECK-NEXT: lvx v4, 0, r3
19+
; CHECK-NEXT: addi r3, r1, -48
20+
; CHECK-NEXT: stvx v3, 0, r3
21+
; CHECK-NEXT: addi r3, r1, -32
22+
; CHECK-NEXT: vperm v2, v2, v2, v4
23+
; CHECK-NEXT: stvx v2, 0, r3
24+
; CHECK-NEXT: lwz r3, -48(r1)
25+
; CHECK-NEXT: lwz r4, -32(r1)
26+
; CHECK-NEXT: cmpw r4, r3
27+
; CHECK-NEXT: bc 12, gt, .LBB0_2
28+
; CHECK-NEXT: b .LBB0_3
29+
; CHECK-NEXT: .LBB0_2: # %.preheader
30+
; CHECK-NEXT: addi r3, r4, 0
31+
; CHECK-NEXT: .LBB0_3: # %.preheader
32+
; CHECK-NEXT: stw r3, -64(r1)
33+
; CHECK-NEXT: addi r3, r1, -64
34+
; CHECK-NEXT: lvx v2, 0, r3
35+
; CHECK-NEXT: addi r3, r1, -16
36+
; CHECK-NEXT: stvx v2, 0, r3
37+
; CHECK-NEXT: blr
38+
br i1 undef, label %7, label %1
39+
40+
1: ; preds = %1, %0
41+
br i1 undef, label %2, label %1
42+
43+
2: ; preds = %1
44+
%3 = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
45+
%4 = call fast <4 x float> @llvm.maxnum.v4f32(<4 x float> %3, <4 x float> zeroinitializer)
46+
%5 = call fast <4 x float> @llvm.maxnum.v4f32(<4 x float> %4, <4 x float> undef)
47+
%6 = extractelement <4 x float> %5, i32 0
48+
br label %7
49+
50+
7: ; preds = %2, %0
51+
%8 = phi float [ %6, %2 ], [ undef, %0 ]
52+
%9 = fcmp fast une float %8, 0.000000e+00
53+
ret void
54+
}
55+
56+
declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #0
57+
58+
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)