@@ -233,6 +233,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
233
233
bool HasAVX2 = ST->hasAVX2 ();
234
234
bool HasDQI = ST->hasDQI ();
235
235
bool HasBWI = ST->hasBWI ();
236
+ bool HasVLX = ST->hasVLX ();
236
237
237
238
auto ConvertToBroadcast = [&](unsigned OpBcst256, unsigned OpBcst128,
238
239
unsigned OpBcst64, unsigned OpBcst32,
@@ -352,20 +353,22 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
352
353
1 );
353
354
}
354
355
355
- // Attempt to find a AVX512 mapping from a full width memory-fold instruction
356
- // to a broadcast-fold instruction variant.
357
- if ((MI.getDesc ().TSFlags & X86II::EncodingMask) == X86II::EVEX) {
356
+ auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc32, unsigned OpSrc64) {
358
357
unsigned OpBcst32 = 0 , OpBcst64 = 0 ;
359
358
unsigned OpNoBcst32 = 0 , OpNoBcst64 = 0 ;
360
- if (const X86MemoryFoldTableEntry *Mem2Bcst =
361
- llvm::lookupBroadcastFoldTable (Opc, 32 )) {
362
- OpBcst32 = Mem2Bcst->DstOp ;
363
- OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
359
+ if (OpSrc32) {
360
+ if (const X86MemoryFoldTableEntry *Mem2Bcst =
361
+ llvm::lookupBroadcastFoldTable (OpSrc32, 32 )) {
362
+ OpBcst32 = Mem2Bcst->DstOp ;
363
+ OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
364
+ }
364
365
}
365
- if (const X86MemoryFoldTableEntry *Mem2Bcst =
366
- llvm::lookupBroadcastFoldTable (Opc, 64 )) {
367
- OpBcst64 = Mem2Bcst->DstOp ;
368
- OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
366
+ if (OpSrc64) {
367
+ if (const X86MemoryFoldTableEntry *Mem2Bcst =
368
+ llvm::lookupBroadcastFoldTable (OpSrc64, 64 )) {
369
+ OpBcst64 = Mem2Bcst->DstOp ;
370
+ OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
371
+ }
369
372
}
370
373
assert (((OpBcst32 == 0 ) || (OpBcst64 == 0 ) || (OpNoBcst32 == OpNoBcst64)) &&
371
374
" OperandNo mismatch" );
@@ -374,6 +377,70 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
374
377
unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
375
378
return ConvertToBroadcast (0 , 0 , OpBcst64, OpBcst32, 0 , 0 , OpNo);
376
379
}
380
+ return false ;
381
+ };
382
+
383
+ // Attempt to find a AVX512 mapping from a full width memory-fold instruction
384
+ // to a broadcast-fold instruction variant.
385
+ if ((MI.getDesc ().TSFlags & X86II::EncodingMask) == X86II::EVEX)
386
+ return ConvertToBroadcastAVX512 (Opc, Opc);
387
+
388
+ // Reverse the X86InstrInfo::setExecutionDomainCustom EVEX->VEX logic
389
+ // conversion to see if we can convert to a broadcasted (integer) logic op.
390
+ if (HasVLX && !HasDQI) {
391
+ unsigned OpSrc32 = 0 , OpSrc64 = 0 ;
392
+ switch (Opc) {
393
+ case X86::VANDPDrm:
394
+ case X86::VANDPSrm:
395
+ case X86::VPANDrm:
396
+ OpSrc32 = X86 ::VPANDDZ128rm;
397
+ OpSrc64 = X86 ::VPANDQZ128rm;
398
+ break ;
399
+ case X86::VANDPDYrm:
400
+ case X86::VANDPSYrm:
401
+ case X86::VPANDYrm:
402
+ OpSrc32 = X86 ::VPANDDZ256rm;
403
+ OpSrc64 = X86 ::VPANDQZ256rm;
404
+ break ;
405
+ case X86::VANDNPDrm:
406
+ case X86::VANDNPSrm:
407
+ case X86::VPANDNrm:
408
+ OpSrc32 = X86 ::VPANDNDZ128rm;
409
+ OpSrc64 = X86 ::VPANDNQZ128rm;
410
+ break ;
411
+ case X86::VANDNPDYrm:
412
+ case X86::VANDNPSYrm:
413
+ case X86::VPANDNYrm:
414
+ OpSrc32 = X86 ::VPANDNDZ256rm;
415
+ OpSrc64 = X86 ::VPANDNQZ256rm;
416
+ break ;
417
+ case X86::VORPDrm:
418
+ case X86::VORPSrm:
419
+ case X86::VPORrm:
420
+ OpSrc32 = X86 ::VPORDZ128rm;
421
+ OpSrc64 = X86 ::VPORQZ128rm;
422
+ break ;
423
+ case X86::VORPDYrm:
424
+ case X86::VORPSYrm:
425
+ case X86::VPORYrm:
426
+ OpSrc32 = X86 ::VPORDZ256rm;
427
+ OpSrc64 = X86 ::VPORQZ256rm;
428
+ break ;
429
+ case X86::VXORPDrm:
430
+ case X86::VXORPSrm:
431
+ case X86::VPXORrm:
432
+ OpSrc32 = X86 ::VPXORDZ128rm;
433
+ OpSrc64 = X86 ::VPXORQZ128rm;
434
+ break ;
435
+ case X86::VXORPDYrm:
436
+ case X86::VXORPSYrm:
437
+ case X86::VPXORYrm:
438
+ OpSrc32 = X86 ::VPXORDZ256rm;
439
+ OpSrc64 = X86 ::VPXORQZ256rm;
440
+ break ;
441
+ }
442
+ if (OpSrc32 || OpSrc64)
443
+ return ConvertToBroadcastAVX512 (OpSrc32, OpSrc64);
377
444
}
378
445
379
446
return false ;
0 commit comments