Skip to content

Commit 1552b91

Browse files
committed
[X86] X86FixupVectorConstantsPass - attempt to match VEX logic ops back to EVEX if we can create a broadcast fold
On non-DQI AVX512 targets, X86InstrInfo::setExecutionDomainCustom will convert EVEX int-domain instructions to VEX fp-domain instructions. But, if we have the chance to use a broadcast fold we're better off using a EVEX instruction, so handle a reverse fold.
1 parent f802fed commit 1552b91

File tree

4 files changed

+120
-23
lines changed

4 files changed

+120
-23
lines changed

llvm/lib/Target/X86/X86FixupVectorConstants.cpp

Lines changed: 78 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
233233
bool HasAVX2 = ST->hasAVX2();
234234
bool HasDQI = ST->hasDQI();
235235
bool HasBWI = ST->hasBWI();
236+
bool HasVLX = ST->hasVLX();
236237

237238
auto ConvertToBroadcast = [&](unsigned OpBcst256, unsigned OpBcst128,
238239
unsigned OpBcst64, unsigned OpBcst32,
@@ -352,20 +353,22 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
352353
1);
353354
}
354355

355-
// Attempt to find a AVX512 mapping from a full width memory-fold instruction
356-
// to a broadcast-fold instruction variant.
357-
if ((MI.getDesc().TSFlags & X86II::EncodingMask) == X86II::EVEX) {
356+
auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc32, unsigned OpSrc64) {
358357
unsigned OpBcst32 = 0, OpBcst64 = 0;
359358
unsigned OpNoBcst32 = 0, OpNoBcst64 = 0;
360-
if (const X86MemoryFoldTableEntry *Mem2Bcst =
361-
llvm::lookupBroadcastFoldTable(Opc, 32)) {
362-
OpBcst32 = Mem2Bcst->DstOp;
363-
OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
359+
if (OpSrc32) {
360+
if (const X86MemoryFoldTableEntry *Mem2Bcst =
361+
llvm::lookupBroadcastFoldTable(OpSrc32, 32)) {
362+
OpBcst32 = Mem2Bcst->DstOp;
363+
OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
364+
}
364365
}
365-
if (const X86MemoryFoldTableEntry *Mem2Bcst =
366-
llvm::lookupBroadcastFoldTable(Opc, 64)) {
367-
OpBcst64 = Mem2Bcst->DstOp;
368-
OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
366+
if (OpSrc64) {
367+
if (const X86MemoryFoldTableEntry *Mem2Bcst =
368+
llvm::lookupBroadcastFoldTable(OpSrc64, 64)) {
369+
OpBcst64 = Mem2Bcst->DstOp;
370+
OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
371+
}
369372
}
370373
assert(((OpBcst32 == 0) || (OpBcst64 == 0) || (OpNoBcst32 == OpNoBcst64)) &&
371374
"OperandNo mismatch");
@@ -374,6 +377,70 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
374377
unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
375378
return ConvertToBroadcast(0, 0, OpBcst64, OpBcst32, 0, 0, OpNo);
376379
}
380+
return false;
381+
};
382+
383+
// Attempt to find a AVX512 mapping from a full width memory-fold instruction
384+
// to a broadcast-fold instruction variant.
385+
if ((MI.getDesc().TSFlags & X86II::EncodingMask) == X86II::EVEX)
386+
return ConvertToBroadcastAVX512(Opc, Opc);
387+
388+
// Reverse the X86InstrInfo::setExecutionDomainCustom EVEX->VEX logic
389+
// conversion to see if we can convert to a broadcasted (integer) logic op.
390+
if (HasVLX && !HasDQI) {
391+
unsigned OpSrc32 = 0, OpSrc64 = 0;
392+
switch (Opc) {
393+
case X86::VANDPDrm:
394+
case X86::VANDPSrm:
395+
case X86::VPANDrm:
396+
OpSrc32 = X86 ::VPANDDZ128rm;
397+
OpSrc64 = X86 ::VPANDQZ128rm;
398+
break;
399+
case X86::VANDPDYrm:
400+
case X86::VANDPSYrm:
401+
case X86::VPANDYrm:
402+
OpSrc32 = X86 ::VPANDDZ256rm;
403+
OpSrc64 = X86 ::VPANDQZ256rm;
404+
break;
405+
case X86::VANDNPDrm:
406+
case X86::VANDNPSrm:
407+
case X86::VPANDNrm:
408+
OpSrc32 = X86 ::VPANDNDZ128rm;
409+
OpSrc64 = X86 ::VPANDNQZ128rm;
410+
break;
411+
case X86::VANDNPDYrm:
412+
case X86::VANDNPSYrm:
413+
case X86::VPANDNYrm:
414+
OpSrc32 = X86 ::VPANDNDZ256rm;
415+
OpSrc64 = X86 ::VPANDNQZ256rm;
416+
break;
417+
case X86::VORPDrm:
418+
case X86::VORPSrm:
419+
case X86::VPORrm:
420+
OpSrc32 = X86 ::VPORDZ128rm;
421+
OpSrc64 = X86 ::VPORQZ128rm;
422+
break;
423+
case X86::VORPDYrm:
424+
case X86::VORPSYrm:
425+
case X86::VPORYrm:
426+
OpSrc32 = X86 ::VPORDZ256rm;
427+
OpSrc64 = X86 ::VPORQZ256rm;
428+
break;
429+
case X86::VXORPDrm:
430+
case X86::VXORPSrm:
431+
case X86::VPXORrm:
432+
OpSrc32 = X86 ::VPXORDZ128rm;
433+
OpSrc64 = X86 ::VPXORQZ128rm;
434+
break;
435+
case X86::VXORPDYrm:
436+
case X86::VXORPSYrm:
437+
case X86::VPXORYrm:
438+
OpSrc32 = X86 ::VPXORDZ256rm;
439+
OpSrc64 = X86 ::VPXORQZ256rm;
440+
break;
441+
}
442+
if (OpSrc32 || OpSrc64)
443+
return ConvertToBroadcastAVX512(OpSrc32, OpSrc64);
377444
}
378445

379446
return false;

llvm/test/CodeGen/X86/combine-abs.ll

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,20 @@ define <16 x i8> @combine_v16i8_abs_constant(<16 x i8> %a) {
164164
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
165165
; SSE-NEXT: retq
166166
;
167-
; AVX-LABEL: combine_v16i8_abs_constant:
168-
; AVX: # %bb.0:
169-
; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
170-
; AVX-NEXT: retq
167+
; AVX2-LABEL: combine_v16i8_abs_constant:
168+
; AVX2: # %bb.0:
169+
; AVX2-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
170+
; AVX2-NEXT: retq
171+
;
172+
; AVX512F-LABEL: combine_v16i8_abs_constant:
173+
; AVX512F: # %bb.0:
174+
; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
175+
; AVX512F-NEXT: retq
176+
;
177+
; AVX512VL-LABEL: combine_v16i8_abs_constant:
178+
; AVX512VL: # %bb.0:
179+
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
180+
; AVX512VL-NEXT: retq
171181
%1 = insertelement <16 x i8> undef, i8 15, i32 0
172182
%2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
173183
%3 = and <16 x i8> %a, %2

llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -711,10 +711,15 @@ define <16 x i8> @shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz(
711711
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
712712
; SSE-NEXT: retq
713713
;
714-
; AVX-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz:
715-
; AVX: # %bb.0:
716-
; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
717-
; AVX-NEXT: retq
714+
; AVX1OR2-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz:
715+
; AVX1OR2: # %bb.0:
716+
; AVX1OR2-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
717+
; AVX1OR2-NEXT: retq
718+
;
719+
; AVX512VL-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz:
720+
; AVX512VL: # %bb.0:
721+
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
722+
; AVX512VL-NEXT: retq
718723
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31>
719724
ret <16 x i8> %shuffle
720725
}

llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2362,10 +2362,25 @@ define <32 x i8> @load_fold_pblendvb_commute(ptr %px, <32 x i8> %y) {
23622362
}
23632363

23642364
define <32 x i8> @shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31(<32 x i8> %a) {
2365-
; ALL-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31:
2366-
; ALL: # %bb.0:
2367-
; ALL-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
2368-
; ALL-NEXT: retq
2365+
; AVX1-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31:
2366+
; AVX1: # %bb.0:
2367+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
2368+
; AVX1-NEXT: retq
2369+
;
2370+
; AVX2-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31:
2371+
; AVX2: # %bb.0:
2372+
; AVX2-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
2373+
; AVX2-NEXT: retq
2374+
;
2375+
; AVX512VL-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31:
2376+
; AVX512VL: # %bb.0:
2377+
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
2378+
; AVX512VL-NEXT: retq
2379+
;
2380+
; XOP-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31:
2381+
; XOP: # %bb.0:
2382+
; XOP-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
2383+
; XOP-NEXT: retq
23692384
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
23702385
ret <32 x i8> %shuffle
23712386
}

0 commit comments

Comments
 (0)