Skip to content

Commit 177a9ac

Browse files
nemanjaitstellar
authored andcommitted
[PowerPC] Unaligned FP default should apply to scalars only
As reported in PR45186, we could be in a situation where we don't want to handle unaligned memory accesses for FP scalars but still have VSX (which allows unaligned access for vectors). Change the default to only apply to scalars. Fixes: https://bugs.llvm.org/show_bug.cgi?id=45186 (cherry picked from commit 099a875)
1 parent 8f299fd commit 177a9ac

File tree

2 files changed

+134
-1
lines changed

2 files changed

+134
-1
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15279,7 +15279,8 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1527915279
if (!VT.isSimple())
1528015280
return false;
1528115281

15282-
if (VT.isFloatingPoint() && !Subtarget.allowsUnalignedFPAccess())
15282+
if (VT.isFloatingPoint() && !VT.isVector() &&
15283+
!Subtarget.allowsUnalignedFPAccess())
1528315284
return false;
1528415285

1528515286
if (VT.getSimpleVT().isVector()) {
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -ppc-asm-full-reg-names -mtriple=powerpc64-- -mattr=+vsx \
3+
; RUN: -verify-machineinstrs < %s | FileCheck %s
4+
%struct.anon = type { i64, i64 }
5+
6+
@d = local_unnamed_addr global %struct.anon zeroinitializer, align 8
7+
8+
; Function Attrs: norecurse nounwind readonly
9+
define i64 @e(i8* nocapture readonly %f) local_unnamed_addr #0 {
10+
; CHECK-LABEL: e:
11+
; CHECK: # %bb.0: # %entry
12+
; CHECK-NEXT: ldx r3, 0, r3
13+
; CHECK-NEXT: blr
14+
entry:
15+
%0 = load i8, i8* %f, align 1
16+
%conv = zext i8 %0 to i64
17+
%shl = shl nuw i64 %conv, 56
18+
%arrayidx1 = getelementptr inbounds i8, i8* %f, i64 1
19+
%1 = load i8, i8* %arrayidx1, align 1
20+
%conv2 = zext i8 %1 to i64
21+
%shl3 = shl nuw nsw i64 %conv2, 48
22+
%or = or i64 %shl3, %shl
23+
%arrayidx4 = getelementptr inbounds i8, i8* %f, i64 2
24+
%2 = load i8, i8* %arrayidx4, align 1
25+
%conv5 = zext i8 %2 to i64
26+
%shl6 = shl nuw nsw i64 %conv5, 40
27+
%or7 = or i64 %or, %shl6
28+
%arrayidx8 = getelementptr inbounds i8, i8* %f, i64 3
29+
%3 = load i8, i8* %arrayidx8, align 1
30+
%conv9 = zext i8 %3 to i64
31+
%shl10 = shl nuw nsw i64 %conv9, 32
32+
%or11 = or i64 %or7, %shl10
33+
%arrayidx12 = getelementptr inbounds i8, i8* %f, i64 4
34+
%4 = load i8, i8* %arrayidx12, align 1
35+
%conv13 = zext i8 %4 to i64
36+
%shl14 = shl nuw nsw i64 %conv13, 24
37+
%or15 = or i64 %or11, %shl14
38+
%arrayidx16 = getelementptr inbounds i8, i8* %f, i64 5
39+
%5 = load i8, i8* %arrayidx16, align 1
40+
%conv17 = zext i8 %5 to i64
41+
%shl18 = shl nuw nsw i64 %conv17, 16
42+
%or20 = or i64 %or15, %shl18
43+
%arrayidx21 = getelementptr inbounds i8, i8* %f, i64 6
44+
%6 = load i8, i8* %arrayidx21, align 1
45+
%conv22 = zext i8 %6 to i64
46+
%shl23 = shl nuw nsw i64 %conv22, 8
47+
%or25 = or i64 %or20, %shl23
48+
%arrayidx26 = getelementptr inbounds i8, i8* %f, i64 7
49+
%7 = load i8, i8* %arrayidx26, align 1
50+
%conv27 = zext i8 %7 to i64
51+
%or28 = or i64 %or25, %conv27
52+
ret i64 %or28
53+
}
54+
55+
; Function Attrs: nofree norecurse nounwind
56+
define void @g() local_unnamed_addr #0 {
57+
; CHECK-LABEL: g:
58+
; CHECK: # %bb.0: # %entry
59+
; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
60+
; CHECK-NEXT: addis r4, r2, .LC1@toc@ha
61+
; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
62+
; CHECK-NEXT: ld r4, .LC1@toc@l(r4)
63+
; CHECK-NEXT: lxvd2x vs0, 0, r3
64+
; CHECK-NEXT: stxvd2x vs0, 0, r4
65+
; CHECK-NEXT: blr
66+
entry:
67+
%0 = load i8, i8* getelementptr inbounds (i8, i8* bitcast (void ()* @g to i8*), i64 8), align 1
68+
%conv.i = zext i8 %0 to i64
69+
%shl.i = shl nuw i64 %conv.i, 56
70+
%1 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 9), align 1
71+
%conv2.i = zext i8 %1 to i64
72+
%shl3.i = shl nuw nsw i64 %conv2.i, 48
73+
%or.i = or i64 %shl3.i, %shl.i
74+
%2 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 10), align 1
75+
%conv5.i = zext i8 %2 to i64
76+
%shl6.i = shl nuw nsw i64 %conv5.i, 40
77+
%or7.i = or i64 %or.i, %shl6.i
78+
%3 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 11), align 1
79+
%conv9.i = zext i8 %3 to i64
80+
%shl10.i = shl nuw nsw i64 %conv9.i, 32
81+
%or11.i = or i64 %or7.i, %shl10.i
82+
%4 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 12), align 1
83+
%conv13.i = zext i8 %4 to i64
84+
%shl14.i = shl nuw nsw i64 %conv13.i, 24
85+
%or15.i = or i64 %or11.i, %shl14.i
86+
%5 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 13), align 1
87+
%conv17.i = zext i8 %5 to i64
88+
%shl18.i = shl nuw nsw i64 %conv17.i, 16
89+
%or20.i = or i64 %or15.i, %shl18.i
90+
%6 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 14), align 1
91+
%conv22.i = zext i8 %6 to i64
92+
%shl23.i = shl nuw nsw i64 %conv22.i, 8
93+
%or25.i = or i64 %or20.i, %shl23.i
94+
%7 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 15), align 1
95+
%conv27.i = zext i8 %7 to i64
96+
%or28.i = or i64 %or25.i, %conv27.i
97+
store i64 %or28.i, i64* getelementptr inbounds (%struct.anon, %struct.anon* @d, i64 0, i32 1), align 8
98+
%8 = load i8, i8* bitcast (void ()* @g to i8*), align 1
99+
%conv.i2 = zext i8 %8 to i64
100+
%shl.i3 = shl nuw i64 %conv.i2, 56
101+
%9 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 1), align 1
102+
%conv2.i4 = zext i8 %9 to i64
103+
%shl3.i5 = shl nuw nsw i64 %conv2.i4, 48
104+
%or.i6 = or i64 %shl3.i5, %shl.i3
105+
%10 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 2), align 1
106+
%conv5.i7 = zext i8 %10 to i64
107+
%shl6.i8 = shl nuw nsw i64 %conv5.i7, 40
108+
%or7.i9 = or i64 %or.i6, %shl6.i8
109+
%11 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 3), align 1
110+
%conv9.i10 = zext i8 %11 to i64
111+
%shl10.i11 = shl nuw nsw i64 %conv9.i10, 32
112+
%or11.i12 = or i64 %or7.i9, %shl10.i11
113+
%12 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 4), align 1
114+
%conv13.i13 = zext i8 %12 to i64
115+
%shl14.i14 = shl nuw nsw i64 %conv13.i13, 24
116+
%or15.i15 = or i64 %or11.i12, %shl14.i14
117+
%13 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 5), align 1
118+
%conv17.i16 = zext i8 %13 to i64
119+
%shl18.i17 = shl nuw nsw i64 %conv17.i16, 16
120+
%or20.i18 = or i64 %or15.i15, %shl18.i17
121+
%14 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 6), align 1
122+
%conv22.i19 = zext i8 %14 to i64
123+
%shl23.i20 = shl nuw nsw i64 %conv22.i19, 8
124+
%or25.i21 = or i64 %or20.i18, %shl23.i20
125+
%15 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 7), align 1
126+
%conv27.i22 = zext i8 %15 to i64
127+
%or28.i23 = or i64 %or25.i21, %conv27.i22
128+
store i64 %or28.i23, i64* getelementptr inbounds (%struct.anon, %struct.anon* @d, i64 0, i32 0), align 8
129+
ret void
130+
}
131+
132+
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)