Skip to content

Commit 49330db

Browse files
committed
[SelectionDAG] Deal with POISON for INSERT_VECTOR_ELT/INSERT_SUBVECTOR (part 2)
Add support in isGuaranteedNotToBeUndefOrPoison and SimplifyDemandedVectorElts to compensate/avoid regressions seen after a previous commit fixing #141034.
1 parent b3a6687 commit 49330db

11 files changed

+310
-281
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5454,6 +5454,60 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
54545454
}
54555455
return true;
54565456

5457+
case ISD::INSERT_SUBVECTOR: {
5458+
if (Op.getValueType().isScalableVector())
5459+
break;
5460+
SDValue Src = Op.getOperand(0);
5461+
SDValue Sub = Op.getOperand(1);
5462+
uint64_t Idx = Op.getConstantOperandVal(2);
5463+
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
5464+
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
5465+
APInt DemandedSrcElts = DemandedElts;
5466+
DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
5467+
5468+
if (!!DemandedSubElts &&
5469+
!isGuaranteedNotToBeUndefOrPoison(Sub, DemandedSubElts,
5470+
PoisonOnly, Depth + 1))
5471+
return false;
5472+
if (!!DemandedSrcElts &&
5473+
!isGuaranteedNotToBeUndefOrPoison(Src, DemandedSrcElts,
5474+
PoisonOnly, Depth + 1))
5475+
return false;
5476+
return true;
5477+
}
5478+
5479+
case ISD::INSERT_VECTOR_ELT: {
5480+
SDValue InVec = Op.getOperand(0);
5481+
SDValue InVal = Op.getOperand(1);
5482+
SDValue EltNo = Op.getOperand(2);
5483+
EVT VT = InVec.getValueType();
5484+
auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
5485+
if (IndexC && VT.isFixedLengthVector() &&
5486+
IndexC->getZExtValue() < VT.getVectorNumElements()) {
5487+
if (DemandedElts[IndexC->getZExtValue()] &&
5488+
!isGuaranteedNotToBeUndefOrPoison(InVal, PoisonOnly, Depth + 1))
5489+
return false;
5490+
APInt InVecDemandedElts = DemandedElts;
5491+
InVecDemandedElts.clearBit(IndexC->getZExtValue());
5492+
if (!!InVecDemandedElts &&
5493+
!isGuaranteedNotToBeUndefOrPoison(InVec, InVecDemandedElts,
5494+
PoisonOnly, Depth + 1))
5495+
return false;
5496+
return true;
5497+
}
5498+
break;
5499+
}
5500+
5501+
case ISD::SCALAR_TO_VECTOR:
5502+
// If only demanding upper (undef) elements.
5503+
if (DemandedElts.ugt(1))
5504+
return PoisonOnly;
5505+
// If only demanding element 0, or only considering poison.
5506+
if (PoisonOnly || DemandedElts == 0)
5507+
return isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), PoisonOnly,
5508+
Depth + 1);
5509+
return false;
5510+
54575511
case ISD::SPLAT_VECTOR:
54585512
return isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), PoisonOnly,
54595513
Depth + 1);

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3359,6 +3359,10 @@ bool TargetLowering::SimplifyDemandedVectorElts(
33593359
APInt DemandedSrcElts = DemandedElts;
33603360
DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
33613361

3362+
// If none of the sub operand elements are demanded, bypass the insert.
3363+
if (!DemandedSubElts)
3364+
return TLO.CombineTo(Op, Src);
3365+
33623366
APInt SubUndef, SubZero;
33633367
if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
33643368
Depth + 1))

llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,10 +229,9 @@ define void @insert_vec_v3i16_uaddlv_from_v8i16(ptr %0) {
229229
; CHECK: ; %bb.0: ; %entry
230230
; CHECK-NEXT: movi.2d v0, #0000000000000000
231231
; CHECK-NEXT: movi.2d v1, #0000000000000000
232+
; CHECK-NEXT: add x8, x0, #8
232233
; CHECK-NEXT: uaddlv.8h s0, v0
233234
; CHECK-NEXT: mov.h v1[0], v0[0]
234-
; CHECK-NEXT: mov.h v1[3], w8
235-
; CHECK-NEXT: add x8, x0, #8
236235
; CHECK-NEXT: ushll.4s v1, v1, #0
237236
; CHECK-NEXT: ucvtf.4s v1, v1
238237
; CHECK-NEXT: st1.s { v1 }[2], [x8]

llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -66,39 +66,38 @@ define amdgpu_vs void @test_3(i32 inreg %arg1, i32 inreg %arg2, ptr addrspace(8)
6666
; CHECK-NEXT: s_mov_b32 s6, s4
6767
; CHECK-NEXT: s_mov_b32 s5, s3
6868
; CHECK-NEXT: s_mov_b32 s4, s2
69-
; CHECK-NEXT: v_add_i32_e32 v0, vcc, 20, v1
70-
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 16, v1
71-
; CHECK-NEXT: v_add_i32_e32 v4, vcc, 12, v1
72-
; CHECK-NEXT: v_add_i32_e32 v5, vcc, 8, v1
73-
; CHECK-NEXT: v_add_i32_e32 v9, vcc, 4, v1
69+
; CHECK-NEXT: v_add_i32_e32 v0, vcc, 12, v1
70+
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 8, v1
71+
; CHECK-NEXT: v_add_i32_e32 v4, vcc, 4, v1
72+
; CHECK-NEXT: v_add_i32_e32 v7, vcc, 20, v1
73+
; CHECK-NEXT: v_add_i32_e32 v9, vcc, 16, v1
7474
; CHECK-NEXT: v_mov_b32_e32 v10, s0
75-
; CHECK-NEXT: v_add_i32_e32 v11, vcc, 20, v2
76-
; CHECK-NEXT: v_add_i32_e32 v12, vcc, 16, v2
75+
; CHECK-NEXT: v_add_i32_e32 v11, vcc, 12, v2
76+
; CHECK-NEXT: v_add_i32_e32 v12, vcc, 8, v2
7777
; CHECK-NEXT: s_mov_b32 m0, -1
78-
; CHECK-NEXT: ds_read_b32 v8, v0
79-
; CHECK-NEXT: ds_read_b32 v7, v3
80-
; CHECK-NEXT: ds_read_b32 v6, v4
81-
; CHECK-NEXT: ds_read_b32 v5, v5
82-
; CHECK-NEXT: ds_read_b32 v4, v9
78+
; CHECK-NEXT: ds_read_b32 v6, v0
79+
; CHECK-NEXT: ds_read_b32 v5, v3
80+
; CHECK-NEXT: ds_read_b32 v4, v4
81+
; CHECK-NEXT: ds_read_b32 v8, v7
82+
; CHECK-NEXT: ds_read_b32 v7, v9
8383
; CHECK-NEXT: ds_read_b32 v3, v1
84-
; CHECK-NEXT: v_add_i32_e32 v1, vcc, 12, v2
85-
; CHECK-NEXT: v_add_i32_e32 v9, vcc, 8, v2
86-
; CHECK-NEXT: v_add_i32_e32 v13, vcc, 4, v2
84+
; CHECK-NEXT: v_add_i32_e32 v0, vcc, 4, v2
85+
; CHECK-NEXT: v_add_i32_e32 v1, vcc, 20, v2
86+
; CHECK-NEXT: v_add_i32_e32 v9, vcc, 16, v2
8787
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
8888
; CHECK-NEXT: tbuffer_store_format_xyzw v[3:6], v10, s[4:7], s1 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:264 glc slc
8989
; CHECK-NEXT: tbuffer_store_format_xy v[7:8], v10, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:280 glc slc
90-
; CHECK-NEXT: ds_read_b32 v0, v12
9190
; CHECK-NEXT: s_waitcnt expcnt(1)
92-
; CHECK-NEXT: ds_read_b32 v5, v1
93-
; CHECK-NEXT: ds_read_b32 v4, v9
94-
; CHECK-NEXT: ds_read_b32 v3, v13
91+
; CHECK-NEXT: ds_read_b32 v5, v11
92+
; CHECK-NEXT: ds_read_b32 v4, v12
93+
; CHECK-NEXT: ds_read_b32 v3, v0
94+
; CHECK-NEXT: ds_read_b32 v1, v1
95+
; CHECK-NEXT: ds_read_b32 v0, v9
9596
; CHECK-NEXT: ds_read_b32 v2, v2
96-
; CHECK-NEXT: ds_read_b32 v1, v11
97-
; CHECK-NEXT: s_waitcnt lgkmcnt(5)
98-
; CHECK-NEXT: exp mrt0 off, off, off, off
9997
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
100-
; CHECK-NEXT: tbuffer_store_format_xyzw v[2:5], v10, s[4:7], s1 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:240 glc slc
98+
; CHECK-NEXT: exp mrt0 off, off, off, off
10199
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
100+
; CHECK-NEXT: tbuffer_store_format_xyzw v[2:5], v10, s[4:7], s1 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:240 glc slc
102101
; CHECK-NEXT: tbuffer_store_format_xy v[0:1], v10, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:256 glc slc
103102
; CHECK-NEXT: s_endpgm
104103
%load1 = load <6 x float>, ptr addrspace(3) %arg5, align 4

llvm/test/CodeGen/Thumb2/active_lane_mask.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -306,10 +306,9 @@ define void @test_width2(ptr nocapture readnone %x, ptr nocapture %y, i8 zeroext
306306
; CHECK-NEXT: ldrne.w r3, [r12]
307307
; CHECK-NEXT: vmovne.32 q0[0], r3
308308
; CHECK-NEXT: lsls r0, r0, #30
309-
; CHECK-NEXT: ittt mi
309+
; CHECK-NEXT: itt mi
310310
; CHECK-NEXT: ldrmi.w r0, [r12, #4]
311311
; CHECK-NEXT: vmovmi.32 q0[2], r0
312-
; CHECK-NEXT: vmovmi.32 q0[3], r0
313312
; CHECK-NEXT: vmrs r3, p0
314313
; CHECK-NEXT: and r0, r3, #1
315314
; CHECK-NEXT: ubfx r3, r3, #8, #1

llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,9 @@ define void @foo_sext_v2i64_v2i32(ptr %dest, ptr %mask, ptr %src) {
110110
; CHECK-LE-NEXT: ldrne r3, [r2]
111111
; CHECK-LE-NEXT: vmovne.32 q1[0], r3
112112
; CHECK-LE-NEXT: lsls r1, r1, #30
113-
; CHECK-LE-NEXT: ittt mi
113+
; CHECK-LE-NEXT: itt mi
114114
; CHECK-LE-NEXT: ldrmi r1, [r2, #4]
115115
; CHECK-LE-NEXT: vmovmi.32 q1[2], r1
116-
; CHECK-LE-NEXT: vmovmi.32 q1[3], r0
117116
; CHECK-LE-NEXT: vmov r2, s6
118117
; CHECK-LE-NEXT: movs r1, #0
119118
; CHECK-LE-NEXT: vmov r3, s0
@@ -220,9 +219,9 @@ define void @foo_sext_v2i64_v2i32_unaligned(ptr %dest, ptr %mask, ptr %src) {
220219
; CHECK-LE-NEXT: sub sp, #4
221220
; CHECK-LE-NEXT: ldrd r12, lr, [r1]
222221
; CHECK-LE-NEXT: movs r1, #0
223-
; CHECK-LE-NEXT: @ implicit-def: $q1
222+
; CHECK-LE-NEXT: @ implicit-def: $q0
224223
; CHECK-LE-NEXT: rsbs.w r3, r12, #0
225-
; CHECK-LE-NEXT: vmov q0[2], q0[0], r12, lr
224+
; CHECK-LE-NEXT: vmov q1[2], q1[0], r12, lr
226225
; CHECK-LE-NEXT: sbcs.w r3, r1, r12, asr #31
227226
; CHECK-LE-NEXT: csetm r3, lt
228227
; CHECK-LE-NEXT: rsbs.w r4, lr, #0
@@ -233,36 +232,35 @@ define void @foo_sext_v2i64_v2i32_unaligned(ptr %dest, ptr %mask, ptr %src) {
233232
; CHECK-LE-NEXT: lsls r3, r1, #31
234233
; CHECK-LE-NEXT: itt ne
235234
; CHECK-LE-NEXT: ldrne r3, [r2]
236-
; CHECK-LE-NEXT: vmovne.32 q1[0], r3
235+
; CHECK-LE-NEXT: vmovne.32 q0[0], r3
237236
; CHECK-LE-NEXT: lsls r1, r1, #30
238-
; CHECK-LE-NEXT: ittt mi
237+
; CHECK-LE-NEXT: itt mi
239238
; CHECK-LE-NEXT: ldrmi r1, [r2, #4]
240-
; CHECK-LE-NEXT: vmovmi.32 q1[2], r1
241-
; CHECK-LE-NEXT: vmovmi.32 q1[3], r0
242-
; CHECK-LE-NEXT: vmov r2, s6
239+
; CHECK-LE-NEXT: vmovmi.32 q0[2], r1
240+
; CHECK-LE-NEXT: vmov r2, s2
243241
; CHECK-LE-NEXT: movs r1, #0
244-
; CHECK-LE-NEXT: vmov r3, s0
245-
; CHECK-LE-NEXT: vmov r4, s4
246-
; CHECK-LE-NEXT: vmov q1[2], q1[0], r4, r2
242+
; CHECK-LE-NEXT: vmov r3, s4
243+
; CHECK-LE-NEXT: vmov r4, s0
244+
; CHECK-LE-NEXT: vmov q0[2], q0[0], r4, r2
247245
; CHECK-LE-NEXT: rsbs r5, r3, #0
248246
; CHECK-LE-NEXT: asr.w r12, r2, #31
249247
; CHECK-LE-NEXT: sbcs.w r2, r1, r3, asr #31
250-
; CHECK-LE-NEXT: vmov r3, s2
248+
; CHECK-LE-NEXT: vmov r3, s6
251249
; CHECK-LE-NEXT: csetm r2, lt
252250
; CHECK-LE-NEXT: asr.w lr, r4, #31
253-
; CHECK-LE-NEXT: vmov q1[3], q1[1], lr, r12
251+
; CHECK-LE-NEXT: vmov q0[3], q0[1], lr, r12
254252
; CHECK-LE-NEXT: rsbs r5, r3, #0
255253
; CHECK-LE-NEXT: sbcs.w r3, r1, r3, asr #31
256254
; CHECK-LE-NEXT: bfi r1, r2, #0, #1
257255
; CHECK-LE-NEXT: csetm r2, lt
258256
; CHECK-LE-NEXT: bfi r1, r2, #1, #1
259257
; CHECK-LE-NEXT: lsls r2, r1, #31
260258
; CHECK-LE-NEXT: itt ne
261-
; CHECK-LE-NEXT: vmovne r2, r3, d2
259+
; CHECK-LE-NEXT: vmovne r2, r3, d0
262260
; CHECK-LE-NEXT: strdne r2, r3, [r0]
263261
; CHECK-LE-NEXT: lsls r1, r1, #30
264262
; CHECK-LE-NEXT: itt mi
265-
; CHECK-LE-NEXT: vmovmi r1, r2, d3
263+
; CHECK-LE-NEXT: vmovmi r1, r2, d1
266264
; CHECK-LE-NEXT: strdmi r1, r2, [r0, #8]
267265
; CHECK-LE-NEXT: add sp, #4
268266
; CHECK-LE-NEXT: pop {r4, r5, r7, pc}
@@ -365,10 +363,9 @@ define void @foo_zext_v2i64_v2i32(ptr %dest, ptr %mask, ptr %src) {
365363
; CHECK-LE-NEXT: ldrne r3, [r2]
366364
; CHECK-LE-NEXT: vmovne.32 q0[0], r3
367365
; CHECK-LE-NEXT: lsls r1, r1, #30
368-
; CHECK-LE-NEXT: ittt mi
366+
; CHECK-LE-NEXT: itt mi
369367
; CHECK-LE-NEXT: ldrmi r1, [r2, #4]
370368
; CHECK-LE-NEXT: vmovmi.32 q0[2], r1
371-
; CHECK-LE-NEXT: vmovmi.32 q0[3], r0
372369
; CHECK-LE-NEXT: vmov r2, s4
373370
; CHECK-LE-NEXT: movs r1, #0
374371
; CHECK-LE-NEXT: vand q0, q0, q2
@@ -481,10 +478,9 @@ define void @foo_zext_v2i64_v2i32_unaligned(ptr %dest, ptr %mask, ptr %src) {
481478
; CHECK-LE-NEXT: ldrne r3, [r2]
482479
; CHECK-LE-NEXT: vmovne.32 q0[0], r3
483480
; CHECK-LE-NEXT: lsls r1, r1, #30
484-
; CHECK-LE-NEXT: ittt mi
481+
; CHECK-LE-NEXT: itt mi
485482
; CHECK-LE-NEXT: ldrmi r1, [r2, #4]
486483
; CHECK-LE-NEXT: vmovmi.32 q0[2], r1
487-
; CHECK-LE-NEXT: vmovmi.32 q0[3], r0
488484
; CHECK-LE-NEXT: vmov r2, s4
489485
; CHECK-LE-NEXT: movs r1, #0
490486
; CHECK-LE-NEXT: vand q0, q0, q2

llvm/test/CodeGen/X86/pr62286.ll

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ define i64 @PR62286(i32 %a) {
2828
; AVX1-NEXT: vmovd %edi, %xmm0
2929
; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
3030
; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0
31+
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
32+
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
3133
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
32-
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
3334
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
3435
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3536
; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1
@@ -58,12 +59,13 @@ define i64 @PR62286(i32 %a) {
5859
; AVX512-LABEL: PR62286:
5960
; AVX512: # %bb.0:
6061
; AVX512-NEXT: vmovd %edi, %xmm0
61-
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0]
62-
; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
63-
; AVX512-NEXT: movw $4369, %ax # imm = 0x1111
62+
; AVX512-NEXT: movb $8, %al
6463
; AVX512-NEXT: kmovd %eax, %k1
65-
; AVX512-NEXT: vpaddd %zmm0, %zmm0, %zmm1 {%k1}
66-
; AVX512-NEXT: vpmovsxdq %ymm1, %zmm0
64+
; AVX512-NEXT: vpexpandd %ymm0, %ymm1 {%k1} {z}
65+
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
66+
; AVX512-NEXT: vpaddd %ymm0, %ymm0, %ymm0
67+
; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
68+
; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0
6769
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
6870
; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
6971
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1

llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-5.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,6 @@ define void @store_i16_stride5_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
444444
; AVX512-NEXT: vpshufb %ymm4, %ymm2, %ymm2
445445
; AVX512-NEXT: vpblendw {{.*#+}} ymm2 = ymm5[0,1],ymm2[2,3],ymm5[4,5,6],ymm2[7],ymm5[8,9],ymm2[10,11],ymm5[12,13,14],ymm2[15]
446446
; AVX512-NEXT: vpternlogq {{.*#+}} ymm2 = ymm3 ^ (mem & (ymm2 ^ ymm3))
447-
; AVX512-NEXT: vinserti32x4 $2, %xmm0, %zmm2, %zmm2
448447
; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
449448
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,1,2,3]
450449
; AVX512-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,3,1,3,4,5,6,7]
@@ -472,7 +471,6 @@ define void @store_i16_stride5_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
472471
; AVX512-FCP-NEXT: vpshufb %ymm4, %ymm2, %ymm2
473472
; AVX512-FCP-NEXT: vpblendw {{.*#+}} ymm2 = ymm5[0,1],ymm2[2,3],ymm5[4,5,6],ymm2[7],ymm5[8,9],ymm2[10,11],ymm5[12,13,14],ymm2[15]
474473
; AVX512-FCP-NEXT: vpternlogq {{.*#+}} ymm2 = ymm3 ^ (mem & (ymm2 ^ ymm3))
475-
; AVX512-FCP-NEXT: vinserti32x4 $2, %xmm0, %zmm2, %zmm2
476474
; AVX512-FCP-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
477475
; AVX512-FCP-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,6,7,14,15,u,u,8,9,10,11,12,13,14,15]
478476
; AVX512-FCP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
@@ -499,7 +497,6 @@ define void @store_i16_stride5_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
499497
; AVX512DQ-NEXT: vpshufb %ymm4, %ymm2, %ymm2
500498
; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm2 = ymm5[0,1],ymm2[2,3],ymm5[4,5,6],ymm2[7],ymm5[8,9],ymm2[10,11],ymm5[12,13,14],ymm2[15]
501499
; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm2 = ymm3 ^ (mem & (ymm2 ^ ymm3))
502-
; AVX512DQ-NEXT: vinserti32x4 $2, %xmm0, %zmm2, %zmm2
503500
; AVX512DQ-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
504501
; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,1,2,3]
505502
; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,3,1,3,4,5,6,7]
@@ -527,7 +524,6 @@ define void @store_i16_stride5_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
527524
; AVX512DQ-FCP-NEXT: vpshufb %ymm4, %ymm2, %ymm2
528525
; AVX512DQ-FCP-NEXT: vpblendw {{.*#+}} ymm2 = ymm5[0,1],ymm2[2,3],ymm5[4,5,6],ymm2[7],ymm5[8,9],ymm2[10,11],ymm5[12,13,14],ymm2[15]
529526
; AVX512DQ-FCP-NEXT: vpternlogq {{.*#+}} ymm2 = ymm3 ^ (mem & (ymm2 ^ ymm3))
530-
; AVX512DQ-FCP-NEXT: vinserti32x4 $2, %xmm0, %zmm2, %zmm2
531527
; AVX512DQ-FCP-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
532528
; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,6,7,14,15,u,u,8,9,10,11,12,13,14,15]
533529
; AVX512DQ-FCP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]

0 commit comments

Comments
 (0)