Skip to content

Commit 9bcf8f0

Browse files
Himadhithhimadhith
andauthored
[NFC][PowerPC] Lockdown instructions for floating point comparison with zero-vector (#162828)
This NFC patch adds a new function which aids in emitting machine instructions for floating point vectors. This was previously not included in the test file as it currently only checks for integer vectors. --------- Co-authored-by: himadhith <[email protected]>
1 parent 107ca63 commit 9bcf8f0

File tree

1 file changed

+77
-0
lines changed

1 file changed

+77
-0
lines changed

llvm/test/CodeGen/PowerPC/check-zero-vector.ll renamed to llvm/test/CodeGen/PowerPC/compare-vector-with-zero.ll

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,3 +95,80 @@ declare i4 @llvm.ctpop.i4(i4) #1
9595
!6 = !{!"short", !7, i64 0}
9696
!7 = !{!"omnipotent char", !8, i64 0}
9797
!8 = !{!"Simple C/C++ TBAA"}
98+
99+
; Function to lockdown changes for floating point vector comparisons
100+
define range(i32 0, 5) i32 @cols_needed(ptr %colauths){
101+
; POWERPC_64LE-LABEL: cols_needed:
102+
; POWERPC_64LE: # %bb.0: # %entry
103+
; POWERPC_64LE-NEXT: lxv vs0, 0(r3)
104+
; POWERPC_64LE-NEXT: xxlxor vs1, vs1, vs1
105+
; POWERPC_64LE-NEXT: li r4, 4
106+
; POWERPC_64LE-NEXT: li r3, 0
107+
; POWERPC_64LE-NEXT: xvcmpeqsp vs0, vs0, vs1
108+
; POWERPC_64LE-NEXT: xxlnor v2, vs0, vs0
109+
; POWERPC_64LE-NEXT: vextuwrx r4, r4, v2
110+
; POWERPC_64LE-NEXT: vextuwrx r3, r3, v2
111+
; POWERPC_64LE-NEXT: rlwinm r4, r4, 1, 30, 30
112+
; POWERPC_64LE-NEXT: sub r3, r4, r3
113+
; POWERPC_64LE-NEXT: mfvsrwz r4, v2
114+
; POWERPC_64LE-NEXT: rlwinm r4, r4, 2, 29, 29
115+
; POWERPC_64LE-NEXT: or r3, r3, r4
116+
; POWERPC_64LE-NEXT: li r4, 12
117+
; POWERPC_64LE-NEXT: vextuwrx r4, r4, v2
118+
; POWERPC_64LE-NEXT: slwi r4, r4, 3
119+
; POWERPC_64LE-NEXT: or r3, r3, r4
120+
; POWERPC_64LE-NEXT: clrlwi r3, r3, 28
121+
; POWERPC_64LE-NEXT: stb r3, -1(r1)
122+
; POWERPC_64LE-NEXT: lbz r3, -1(r1)
123+
; POWERPC_64LE-NEXT: popcntd r3, r3
124+
; POWERPC_64LE-NEXT: blr
125+
;
126+
; POWERPC_64-LABEL: cols_needed:
127+
; POWERPC_64: # %bb.0: # %entry
128+
; POWERPC_64-NEXT: lxv vs0, 0(r3)
129+
; POWERPC_64-NEXT: xxlxor vs1, vs1, vs1
130+
; POWERPC_64-NEXT: li r4, 8
131+
; POWERPC_64-NEXT: xvcmpeqsp vs0, vs0, vs1
132+
; POWERPC_64-NEXT: xxlnor v2, vs0, vs0
133+
; POWERPC_64-NEXT: vextuwlx r4, r4, v2
134+
; POWERPC_64-NEXT: mfvsrwz r3, v2
135+
; POWERPC_64-NEXT: rlwinm r4, r4, 1, 30, 30
136+
; POWERPC_64-NEXT: rlwimi r4, r3, 2, 29, 29
137+
; POWERPC_64-NEXT: li r3, 0
138+
; POWERPC_64-NEXT: vextuwlx r3, r3, v2
139+
; POWERPC_64-NEXT: rlwimi r4, r3, 3, 0, 28
140+
; POWERPC_64-NEXT: li r3, 12
141+
; POWERPC_64-NEXT: vextuwlx r3, r3, v2
142+
; POWERPC_64-NEXT: sub r3, r4, r3
143+
; POWERPC_64-NEXT: clrlwi r3, r3, 28
144+
; POWERPC_64-NEXT: stb r3, -1(r1)
145+
; POWERPC_64-NEXT: lbz r3, -1(r1)
146+
; POWERPC_64-NEXT: popcntd r3, r3
147+
; POWERPC_64-NEXT: blr
148+
;
149+
; POWERPC_32-LABEL: cols_needed:
150+
; POWERPC_32: # %bb.0: # %entry
151+
; POWERPC_32-NEXT: lxv vs0, 0(r3)
152+
; POWERPC_32-NEXT: xxlxor vs1, vs1, vs1
153+
; POWERPC_32-NEXT: xvcmpeqsp vs0, vs0, vs1
154+
; POWERPC_32-NEXT: xxlnor vs0, vs0, vs0
155+
; POWERPC_32-NEXT: stxv vs0, -32(r1)
156+
; POWERPC_32-NEXT: lwz r3, -24(r1)
157+
; POWERPC_32-NEXT: lwz r4, -28(r1)
158+
; POWERPC_32-NEXT: rlwinm r3, r3, 1, 30, 30
159+
; POWERPC_32-NEXT: rlwimi r3, r4, 2, 29, 29
160+
; POWERPC_32-NEXT: lwz r4, -32(r1)
161+
; POWERPC_32-NEXT: rlwimi r3, r4, 3, 0, 28
162+
; POWERPC_32-NEXT: lwz r4, -20(r1)
163+
; POWERPC_32-NEXT: sub r3, r3, r4
164+
; POWERPC_32-NEXT: clrlwi r3, r3, 28
165+
; POWERPC_32-NEXT: popcntw r3, r3
166+
; POWERPC_32-NEXT: blr
167+
entry:
168+
%0 = load <4 x float>, ptr %colauths, align 4, !tbaa !5
169+
%1 = fcmp une <4 x float> %0, zeroinitializer
170+
%2 = bitcast <4 x i1> %1 to i4
171+
%3 = tail call range(i4 0, 5) i4 @llvm.ctpop.i4(i4 %2)
172+
%4 = zext nneg i4 %3 to i32
173+
ret i32 %4
174+
}

0 commit comments

Comments
 (0)