@@ -407,6 +407,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
407407 {X86::VMOVSDZrm, 1 , 64 , rebuildZeroUpperCst},
408408 {X86::VMOVDDUPZ128rm, 1 , 64 , rebuildSplatCst}},
409409 128 , 1 );
410+ case X86::VMOVAPDZ128rmk:
411+ case X86::VMOVUPDZ128rmk:
412+ return FixupConstant ({{X86::VMOVSDZrmk, 1 , 64 , rebuildZeroUpperCst},
413+ {X86::VMOVDDUPZ128rmk, 1 , 64 , rebuildSplatCst}},
414+ 128 , 3 );
415+ case X86::VMOVAPDZ128rmkz:
416+ case X86::VMOVUPDZ128rmkz:
417+ return FixupConstant ({{X86::VMOVSDZrmkz, 1 , 64 , rebuildZeroUpperCst},
418+ {X86::VMOVDDUPZ128rmkz, 1 , 64 , rebuildSplatCst}},
419+ 128 , 2 );
420+ case X86::VMOVAPSZ128rmk:
421+ case X86::VMOVUPSZ128rmk:
422+ return FixupConstant ({{X86::VMOVSSZrmk, 1 , 32 , rebuildZeroUpperCst},
423+ {X86::VBROADCASTSSZ128rmk, 1 , 32 , rebuildSplatCst}},
424+ 128 , 3 );
425+ case X86::VMOVAPSZ128rmkz:
426+ case X86::VMOVUPSZ128rmkz:
427+ return FixupConstant ({{X86::VMOVSSZrmkz, 1 , 32 , rebuildZeroUpperCst},
428+ {X86::VBROADCASTSSZ128rmkz, 1 , 32 , rebuildSplatCst}},
429+ 128 , 2 );
410430 case X86::VMOVAPDZ256rm:
411431 case X86::VMOVAPSZ256rm:
412432 case X86::VMOVUPDZ256rm:
@@ -416,6 +436,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
416436 {X86::VBROADCASTSDZ256rm, 1 , 64 , rebuildSplatCst},
417437 {X86::VBROADCASTF32X4Z256rm, 1 , 128 , rebuildSplatCst}},
418438 256 , 1 );
439+ case X86::VMOVAPDZ256rmk:
440+ case X86::VMOVUPDZ256rmk:
441+ return FixupConstant ({{X86::VBROADCASTSDZ256rmk, 1 , 64 , rebuildSplatCst}},
442+ 256 , 3 );
443+ case X86::VMOVAPDZ256rmkz:
444+ case X86::VMOVUPDZ256rmkz:
445+ return FixupConstant ({{X86::VBROADCASTSDZ256rmkz, 1 , 64 , rebuildSplatCst}},
446+ 256 , 2 );
447+ case X86::VMOVAPSZ256rmk:
448+ case X86::VMOVUPSZ256rmk:
449+ return FixupConstant (
450+ {{X86::VBROADCASTSSZ256rmk, 1 , 32 , rebuildSplatCst},
451+ {X86::VBROADCASTF32X4Z256rmk, 1 , 128 , rebuildSplatCst}},
452+ 256 , 3 );
453+ case X86::VMOVAPSZ256rmkz:
454+ case X86::VMOVUPSZ256rmkz:
455+ return FixupConstant (
456+ {{X86::VBROADCASTSSZ256rmkz, 1 , 32 , rebuildSplatCst},
457+ {X86::VBROADCASTF32X4Z256rmkz, 1 , 128 , rebuildSplatCst}},
458+ 256 , 2 );
419459 case X86::VMOVAPDZrm:
420460 case X86::VMOVAPSZrm:
421461 case X86::VMOVUPDZrm:
@@ -425,6 +465,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
425465 {X86::VBROADCASTF32X4rm, 1 , 128 , rebuildSplatCst},
426466 {X86::VBROADCASTF64X4rm, 1 , 256 , rebuildSplatCst}},
427467 512 , 1 );
468+ case X86::VMOVAPDZrmk:
469+ case X86::VMOVUPDZrmk:
470+ return FixupConstant ({{X86::VBROADCASTSDZrmk, 1 , 64 , rebuildSplatCst},
471+ {X86::VBROADCASTF64X4rmk, 1 , 256 , rebuildSplatCst}},
472+ 512 , 3 );
473+ case X86::VMOVAPDZrmkz:
474+ case X86::VMOVUPDZrmkz:
475+ return FixupConstant ({{X86::VBROADCASTSDZrmkz, 1 , 64 , rebuildSplatCst},
476+ {X86::VBROADCASTF64X4rmkz, 1 , 256 , rebuildSplatCst}},
477+ 512 , 2 );
478+ case X86::VMOVAPSZrmk:
479+ case X86::VMOVUPSZrmk:
480+ return FixupConstant ({{X86::VBROADCASTSSZrmk, 1 , 32 , rebuildSplatCst},
481+ {X86::VBROADCASTF32X4rmk, 1 , 128 , rebuildSplatCst}},
482+ 512 , 3 );
483+ case X86::VMOVAPSZrmkz:
484+ case X86::VMOVUPSZrmkz:
485+ return FixupConstant ({{X86::VBROADCASTSSZrmkz, 1 , 32 , rebuildSplatCst},
486+ {X86::VBROADCASTF32X4rmkz, 1 , 128 , rebuildSplatCst}},
487+ 512 , 2 );
428488 /* Integer Loads */
429489 case X86::MOVDQArm:
430490 case X86::MOVDQUrm: {
@@ -520,6 +580,42 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
520580 {X86::VPMOVZXDQZ128rm, 2 , 32 , rebuildZExtCst}};
521581 return FixupConstant (Fixups, 128 , 1 );
522582 }
583+ case X86::VMOVDQA32Z128rmk:
584+ case X86::VMOVDQU32Z128rmk:
585+ return FixupConstant ({{X86::VPBROADCASTDZ128rmk, 1 , 32 , rebuildSplatCst},
586+ {X86::VPMOVSXBDZ128rmk, 4 , 8 , rebuildSExtCst},
587+ {X86::VPMOVZXBDZ128rmk, 4 , 8 , rebuildZExtCst},
588+ {X86::VPMOVSXWDZ128rmk, 4 , 16 , rebuildSExtCst},
589+ {X86::VPMOVZXWDZ128rmk, 4 , 16 , rebuildZExtCst}},
590+ 128 , 3 );
591+ case X86::VMOVDQA32Z128rmkz:
592+ case X86::VMOVDQU32Z128rmkz:
593+ return FixupConstant ({{X86::VPBROADCASTDZ128rmkz, 1 , 32 , rebuildSplatCst},
594+ {X86::VPMOVSXBDZ128rmkz, 4 , 8 , rebuildSExtCst},
595+ {X86::VPMOVZXBDZ128rmkz, 4 , 8 , rebuildZExtCst},
596+ {X86::VPMOVSXWDZ128rmkz, 4 , 16 , rebuildSExtCst},
597+ {X86::VPMOVZXWDZ128rmkz, 4 , 16 , rebuildZExtCst}},
598+ 128 , 2 );
599+ case X86::VMOVDQA64Z128rmk:
600+ case X86::VMOVDQU64Z128rmk:
601+ return FixupConstant ({{X86::VPMOVSXBQZ128rmk, 2 , 8 , rebuildSExtCst},
602+ {X86::VPMOVZXBQZ128rmk, 2 , 8 , rebuildZExtCst},
603+ {X86::VPMOVSXWQZ128rmk, 2 , 16 , rebuildSExtCst},
604+ {X86::VPMOVZXWQZ128rmk, 2 , 16 , rebuildZExtCst},
605+ {X86::VPBROADCASTQZ128rmk, 1 , 64 , rebuildSplatCst},
606+ {X86::VPMOVSXDQZ128rmk, 2 , 32 , rebuildSExtCst},
607+ {X86::VPMOVZXDQZ128rmk, 2 , 32 , rebuildZExtCst}},
608+ 128 , 3 );
609+ case X86::VMOVDQA64Z128rmkz:
610+ case X86::VMOVDQU64Z128rmkz:
611+ return FixupConstant ({{X86::VPMOVSXBQZ128rmkz, 2 , 8 , rebuildSExtCst},
612+ {X86::VPMOVZXBQZ128rmkz, 2 , 8 , rebuildZExtCst},
613+ {X86::VPMOVSXWQZ128rmkz, 2 , 16 , rebuildSExtCst},
614+ {X86::VPMOVZXWQZ128rmkz, 2 , 16 , rebuildZExtCst},
615+ {X86::VPBROADCASTQZ128rmkz, 1 , 64 , rebuildSplatCst},
616+ {X86::VPMOVSXDQZ128rmkz, 2 , 32 , rebuildSExtCst},
617+ {X86::VPMOVZXDQZ128rmkz, 2 , 32 , rebuildZExtCst}},
618+ 128 , 2 );
523619 case X86::VMOVDQA32Z256rm:
524620 case X86::VMOVDQA64Z256rm:
525621 case X86::VMOVDQU32Z256rm:
@@ -544,6 +640,46 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
544640 {X86::VPMOVZXDQZ256rm, 4 , 32 , rebuildZExtCst}};
545641 return FixupConstant (Fixups, 256 , 1 );
546642 }
643+ case X86::VMOVDQA32Z256rmk:
644+ case X86::VMOVDQU32Z256rmk:
645+ return FixupConstant (
646+ {{X86::VPBROADCASTDZ256rmk, 1 , 32 , rebuildSplatCst},
647+ {X86::VPMOVSXBDZ256rmk, 8 , 8 , rebuildSExtCst},
648+ {X86::VPMOVZXBDZ256rmk, 8 , 8 , rebuildZExtCst},
649+ {X86::VBROADCASTI32X4Z256rmk, 1 , 128 , rebuildSplatCst},
650+ {X86::VPMOVSXWDZ256rmk, 8 , 16 , rebuildSExtCst},
651+ {X86::VPMOVZXWDZ256rmk, 8 , 16 , rebuildZExtCst}},
652+ 256 , 3 );
653+ case X86::VMOVDQA32Z256rmkz:
654+ case X86::VMOVDQU32Z256rmkz:
655+ return FixupConstant (
656+ {{X86::VPBROADCASTDZ256rmkz, 1 , 32 , rebuildSplatCst},
657+ {X86::VPMOVSXBDZ256rmkz, 8 , 8 , rebuildSExtCst},
658+ {X86::VPMOVZXBDZ256rmkz, 8 , 8 , rebuildZExtCst},
659+ {X86::VBROADCASTI32X4Z256rmkz, 1 , 128 , rebuildSplatCst},
660+ {X86::VPMOVSXWDZ256rmkz, 8 , 16 , rebuildSExtCst},
661+ {X86::VPMOVZXWDZ256rmkz, 8 , 16 , rebuildZExtCst}},
662+ 256 , 2 );
663+ case X86::VMOVDQA64Z256rmk:
664+ case X86::VMOVDQU64Z256rmk:
665+ return FixupConstant ({{X86::VPMOVSXBQZ256rmk, 4 , 8 , rebuildSExtCst},
666+ {X86::VPMOVZXBQZ256rmk, 4 , 8 , rebuildZExtCst},
667+ {X86::VPBROADCASTQZ256rmk, 1 , 64 , rebuildSplatCst},
668+ {X86::VPMOVSXWQZ256rmk, 4 , 16 , rebuildSExtCst},
669+ {X86::VPMOVZXWQZ256rmk, 4 , 16 , rebuildZExtCst},
670+ {X86::VPMOVSXDQZ256rmk, 4 , 32 , rebuildSExtCst},
671+ {X86::VPMOVZXDQZ256rmk, 4 , 32 , rebuildZExtCst}},
672+ 256 , 3 );
673+ case X86::VMOVDQA64Z256rmkz:
674+ case X86::VMOVDQU64Z256rmkz:
675+ return FixupConstant ({{X86::VPMOVSXBQZ256rmkz, 4 , 8 , rebuildSExtCst},
676+ {X86::VPMOVZXBQZ256rmkz, 4 , 8 , rebuildZExtCst},
677+ {X86::VPBROADCASTQZ256rmkz, 1 , 64 , rebuildSplatCst},
678+ {X86::VPMOVSXWQZ256rmkz, 4 , 16 , rebuildSExtCst},
679+ {X86::VPMOVZXWQZ256rmkz, 4 , 16 , rebuildZExtCst},
680+ {X86::VPMOVSXDQZ256rmkz, 4 , 32 , rebuildSExtCst},
681+ {X86::VPMOVZXDQZ256rmkz, 4 , 32 , rebuildZExtCst}},
682+ 256 , 2 );
547683 case X86::VMOVDQA32Zrm:
548684 case X86::VMOVDQA64Zrm:
549685 case X86::VMOVDQU32Zrm:
@@ -569,43 +705,93 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
569705 {X86::VPMOVZXDQZrm, 8 , 32 , rebuildZExtCst}};
570706 return FixupConstant (Fixups, 512 , 1 );
571707 }
708+ case X86::VMOVDQA32Zrmk:
709+ case X86::VMOVDQU32Zrmk:
710+ return FixupConstant ({{X86::VPBROADCASTDZrmk, 1 , 32 , rebuildSplatCst},
711+ {X86::VBROADCASTI32X4rmk, 1 , 128 , rebuildSplatCst},
712+ {X86::VPMOVSXBDZrmk, 16 , 8 , rebuildSExtCst},
713+ {X86::VPMOVZXBDZrmk, 16 , 8 , rebuildZExtCst},
714+ {X86::VPMOVSXWDZrmk, 16 , 16 , rebuildSExtCst},
715+ {X86::VPMOVZXWDZrmk, 16 , 16 , rebuildZExtCst}},
716+ 512 , 3 );
717+ case X86::VMOVDQA32Zrmkz:
718+ case X86::VMOVDQU32Zrmkz:
719+ return FixupConstant ({{X86::VPBROADCASTDZrmkz, 1 , 32 , rebuildSplatCst},
720+ {X86::VBROADCASTI32X4rmkz, 1 , 128 , rebuildSplatCst},
721+ {X86::VPMOVSXBDZrmkz, 16 , 8 , rebuildSExtCst},
722+ {X86::VPMOVZXBDZrmkz, 16 , 8 , rebuildZExtCst},
723+ {X86::VPMOVSXWDZrmkz, 16 , 16 , rebuildSExtCst},
724+ {X86::VPMOVZXWDZrmkz, 16 , 16 , rebuildZExtCst}},
725+ 512 , 2 );
726+ case X86::VMOVDQA64Zrmk:
727+ case X86::VMOVDQU64Zrmk:
728+ return FixupConstant ({{X86::VPBROADCASTQZrmk, 1 , 64 , rebuildSplatCst},
729+ {X86::VPMOVSXBQZrmk, 8 , 8 , rebuildSExtCst},
730+ {X86::VPMOVZXBQZrmk, 8 , 8 , rebuildZExtCst},
731+ {X86::VPMOVSXWQZrmk, 8 , 16 , rebuildSExtCst},
732+ {X86::VPMOVZXWQZrmk, 8 , 16 , rebuildZExtCst},
733+ {X86::VBROADCASTI64X4rmk, 1 , 256 , rebuildSplatCst},
734+ {X86::VPMOVSXDQZrmk, 8 , 32 , rebuildSExtCst},
735+ {X86::VPMOVZXDQZrmk, 8 , 32 , rebuildZExtCst}},
736+ 512 , 3 );
737+ case X86::VMOVDQA64Zrmkz:
738+ case X86::VMOVDQU64Zrmkz:
739+ return FixupConstant ({{X86::VPBROADCASTQZrmkz, 1 , 64 , rebuildSplatCst},
740+ {X86::VPMOVSXBQZrmkz, 8 , 8 , rebuildSExtCst},
741+ {X86::VPMOVZXBQZrmkz, 8 , 8 , rebuildZExtCst},
742+ {X86::VPMOVSXWQZrmkz, 8 , 16 , rebuildSExtCst},
743+ {X86::VPMOVZXWQZrmkz, 8 , 16 , rebuildZExtCst},
744+ {X86::VBROADCASTI64X4rmkz, 1 , 256 , rebuildSplatCst},
745+ {X86::VPMOVSXDQZrmkz, 8 , 32 , rebuildSExtCst},
746+ {X86::VPMOVZXDQZrmkz, 8 , 32 , rebuildZExtCst}},
747+ 512 , 2 );
572748 }
573749
574- auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc32, unsigned OpSrc64) {
575- unsigned OpBcst32 = 0 , OpBcst64 = 0 ;
576- unsigned OpNoBcst32 = 0 , OpNoBcst64 = 0 ;
750+ auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc16, unsigned OpSrc32,
751+ unsigned OpSrc64) {
752+ if (OpSrc16) {
753+ if (const X86FoldTableEntry *Mem2Bcst =
754+ llvm::lookupBroadcastFoldTableBySize (OpSrc16, 16 )) {
755+ unsigned OpBcst16 = Mem2Bcst->DstOp ;
756+ unsigned OpNoBcst16 = Mem2Bcst->Flags & TB_INDEX_MASK;
757+ FixupEntry Fixups[] = {{(int )OpBcst16, 1 , 16 , rebuildSplatCst}};
758+ // TODO: Add support for RegBitWidth, but currently rebuildSplatCst
759+ // doesn't require it (defaults to Constant::getPrimitiveSizeInBits).
760+ if (FixupConstant (Fixups, 0 , OpNoBcst16))
761+ return true ;
762+ }
763+ }
577764 if (OpSrc32) {
578765 if (const X86FoldTableEntry *Mem2Bcst =
579766 llvm::lookupBroadcastFoldTableBySize (OpSrc32, 32 )) {
580- OpBcst32 = Mem2Bcst->DstOp ;
581- OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
767+ unsigned OpBcst32 = Mem2Bcst->DstOp ;
768+ unsigned OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
769+ FixupEntry Fixups[] = {{(int )OpBcst32, 1 , 32 , rebuildSplatCst}};
770+ // TODO: Add support for RegBitWidth, but currently rebuildSplatCst
771+ // doesn't require it (defaults to Constant::getPrimitiveSizeInBits).
772+ if (FixupConstant (Fixups, 0 , OpNoBcst32))
773+ return true ;
582774 }
583775 }
584776 if (OpSrc64) {
585777 if (const X86FoldTableEntry *Mem2Bcst =
586778 llvm::lookupBroadcastFoldTableBySize (OpSrc64, 64 )) {
587- OpBcst64 = Mem2Bcst->DstOp ;
588- OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
779+ unsigned OpBcst64 = Mem2Bcst->DstOp ;
780+ unsigned OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
781+ FixupEntry Fixups[] = {{(int )OpBcst64, 1 , 64 , rebuildSplatCst}};
782+ // TODO: Add support for RegBitWidth, but currently rebuildSplatCst
783+ // doesn't require it (defaults to Constant::getPrimitiveSizeInBits).
784+ if (FixupConstant (Fixups, 0 , OpNoBcst64))
785+ return true ;
589786 }
590787 }
591- assert (((OpBcst32 == 0 ) || (OpBcst64 == 0 ) || (OpNoBcst32 == OpNoBcst64)) &&
592- " OperandNo mismatch" );
593-
594- if (OpBcst32 || OpBcst64) {
595- unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
596- FixupEntry Fixups[] = {{(int )OpBcst32, 32 , 32 , rebuildSplatCst},
597- {(int )OpBcst64, 64 , 64 , rebuildSplatCst}};
598- // TODO: Add support for RegBitWidth, but currently rebuildSplatCst
599- // doesn't require it (defaults to Constant::getPrimitiveSizeInBits).
600- return FixupConstant (Fixups, 0 , OpNo);
601- }
602788 return false ;
603789 };
604790
605791 // Attempt to find a AVX512 mapping from a full width memory-fold instruction
606792 // to a broadcast-fold instruction variant.
607793 if ((MI.getDesc ().TSFlags & X86II::EncodingMask) == X86II::EVEX)
608- return ConvertToBroadcastAVX512 (Opc, Opc);
794+ return ConvertToBroadcastAVX512 (Opc, Opc, Opc );
609795
610796 // Reverse the X86InstrInfo::setExecutionDomainCustom EVEX->VEX logic
611797 // conversion to see if we can convert to a broadcasted (integer) logic op.
@@ -662,7 +848,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
662848 break ;
663849 }
664850 if (OpSrc32 || OpSrc64)
665- return ConvertToBroadcastAVX512 (OpSrc32, OpSrc64);
851+ return ConvertToBroadcastAVX512 (0 , OpSrc32, OpSrc64);
666852 }
667853
668854 return false ;
0 commit comments