@@ -95,3 +95,80 @@ declare i4 @llvm.ctpop.i4(i4) #1
9595!6 = !{!"short" , !7 , i64 0 }
9696!7 = !{!"omnipotent char" , !8 , i64 0 }
9797!8 = !{!"Simple C/C++ TBAA" }
98+
99+ ; Function to lockdown changes for floating point vector comparisons
100+ define range(i32 0 , 5 ) i32 @cols_needed (ptr %colauths ){
101+ ; POWERPC_64LE-LABEL: cols_needed:
102+ ; POWERPC_64LE: # %bb.0: # %entry
103+ ; POWERPC_64LE-NEXT: lxv vs0, 0(r3)
104+ ; POWERPC_64LE-NEXT: xxlxor vs1, vs1, vs1
105+ ; POWERPC_64LE-NEXT: li r4, 4
106+ ; POWERPC_64LE-NEXT: li r3, 0
107+ ; POWERPC_64LE-NEXT: xvcmpeqsp vs0, vs0, vs1
108+ ; POWERPC_64LE-NEXT: xxlnor v2, vs0, vs0
109+ ; POWERPC_64LE-NEXT: vextuwrx r4, r4, v2
110+ ; POWERPC_64LE-NEXT: vextuwrx r3, r3, v2
111+ ; POWERPC_64LE-NEXT: rlwinm r4, r4, 1, 30, 30
112+ ; POWERPC_64LE-NEXT: sub r3, r4, r3
113+ ; POWERPC_64LE-NEXT: mfvsrwz r4, v2
114+ ; POWERPC_64LE-NEXT: rlwinm r4, r4, 2, 29, 29
115+ ; POWERPC_64LE-NEXT: or r3, r3, r4
116+ ; POWERPC_64LE-NEXT: li r4, 12
117+ ; POWERPC_64LE-NEXT: vextuwrx r4, r4, v2
118+ ; POWERPC_64LE-NEXT: slwi r4, r4, 3
119+ ; POWERPC_64LE-NEXT: or r3, r3, r4
120+ ; POWERPC_64LE-NEXT: clrlwi r3, r3, 28
121+ ; POWERPC_64LE-NEXT: stb r3, -1(r1)
122+ ; POWERPC_64LE-NEXT: lbz r3, -1(r1)
123+ ; POWERPC_64LE-NEXT: popcntd r3, r3
124+ ; POWERPC_64LE-NEXT: blr
125+ ;
126+ ; POWERPC_64-LABEL: cols_needed:
127+ ; POWERPC_64: # %bb.0: # %entry
128+ ; POWERPC_64-NEXT: lxv vs0, 0(r3)
129+ ; POWERPC_64-NEXT: xxlxor vs1, vs1, vs1
130+ ; POWERPC_64-NEXT: li r4, 8
131+ ; POWERPC_64-NEXT: xvcmpeqsp vs0, vs0, vs1
132+ ; POWERPC_64-NEXT: xxlnor v2, vs0, vs0
133+ ; POWERPC_64-NEXT: vextuwlx r4, r4, v2
134+ ; POWERPC_64-NEXT: mfvsrwz r3, v2
135+ ; POWERPC_64-NEXT: rlwinm r4, r4, 1, 30, 30
136+ ; POWERPC_64-NEXT: rlwimi r4, r3, 2, 29, 29
137+ ; POWERPC_64-NEXT: li r3, 0
138+ ; POWERPC_64-NEXT: vextuwlx r3, r3, v2
139+ ; POWERPC_64-NEXT: rlwimi r4, r3, 3, 0, 28
140+ ; POWERPC_64-NEXT: li r3, 12
141+ ; POWERPC_64-NEXT: vextuwlx r3, r3, v2
142+ ; POWERPC_64-NEXT: sub r3, r4, r3
143+ ; POWERPC_64-NEXT: clrlwi r3, r3, 28
144+ ; POWERPC_64-NEXT: stb r3, -1(r1)
145+ ; POWERPC_64-NEXT: lbz r3, -1(r1)
146+ ; POWERPC_64-NEXT: popcntd r3, r3
147+ ; POWERPC_64-NEXT: blr
148+ ;
149+ ; POWERPC_32-LABEL: cols_needed:
150+ ; POWERPC_32: # %bb.0: # %entry
151+ ; POWERPC_32-NEXT: lxv vs0, 0(r3)
152+ ; POWERPC_32-NEXT: xxlxor vs1, vs1, vs1
153+ ; POWERPC_32-NEXT: xvcmpeqsp vs0, vs0, vs1
154+ ; POWERPC_32-NEXT: xxlnor vs0, vs0, vs0
155+ ; POWERPC_32-NEXT: stxv vs0, -32(r1)
156+ ; POWERPC_32-NEXT: lwz r3, -24(r1)
157+ ; POWERPC_32-NEXT: lwz r4, -28(r1)
158+ ; POWERPC_32-NEXT: rlwinm r3, r3, 1, 30, 30
159+ ; POWERPC_32-NEXT: rlwimi r3, r4, 2, 29, 29
160+ ; POWERPC_32-NEXT: lwz r4, -32(r1)
161+ ; POWERPC_32-NEXT: rlwimi r3, r4, 3, 0, 28
162+ ; POWERPC_32-NEXT: lwz r4, -20(r1)
163+ ; POWERPC_32-NEXT: sub r3, r3, r4
164+ ; POWERPC_32-NEXT: clrlwi r3, r3, 28
165+ ; POWERPC_32-NEXT: popcntw r3, r3
166+ ; POWERPC_32-NEXT: blr
167+ entry:
168+ %0 = load <4 x float >, ptr %colauths , align 4 , !tbaa !5
169+ %1 = fcmp une <4 x float > %0 , zeroinitializer
170+ %2 = bitcast <4 x i1 > %1 to i4
171+ %3 = tail call range(i4 0 , 5 ) i4 @llvm.ctpop.i4 (i4 %2 )
172+ %4 = zext nneg i4 %3 to i32
173+ ret i32 %4
174+ }
0 commit comments