@@ -424,6 +424,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
424424 {X86::VMOVSDZrm, 1 , 64 , rebuildZeroUpperCst},
425425 {X86::VMOVDDUPZ128rm, 1 , 64 , rebuildSplatCst}},
426426 128 , 1 );
427+ case X86::VMOVAPDZ128rmk:
428+ case X86::VMOVUPDZ128rmk:
429+ return FixupConstant ({{X86::VMOVSDZrmk, 1 , 64 , rebuildZeroUpperCst},
430+ {X86::VMOVDDUPZ128rmk, 1 , 64 , rebuildSplatCst}},
431+ 128 , 3 );
432+ case X86::VMOVAPDZ128rmkz:
433+ case X86::VMOVUPDZ128rmkz:
434+ return FixupConstant ({{X86::VMOVSDZrmkz, 1 , 64 , rebuildZeroUpperCst},
435+ {X86::VMOVDDUPZ128rmkz, 1 , 64 , rebuildSplatCst}},
436+ 128 , 2 );
437+ case X86::VMOVAPSZ128rmk:
438+ case X86::VMOVUPSZ128rmk:
439+ return FixupConstant ({{X86::VMOVSSZrmk, 1 , 32 , rebuildZeroUpperCst},
440+ {X86::VBROADCASTSSZ128rmk, 1 , 32 , rebuildSplatCst}},
441+ 128 , 3 );
442+ case X86::VMOVAPSZ128rmkz:
443+ case X86::VMOVUPSZ128rmkz:
444+ return FixupConstant ({{X86::VMOVSSZrmkz, 1 , 32 , rebuildZeroUpperCst},
445+ {X86::VBROADCASTSSZ128rmkz, 1 , 32 , rebuildSplatCst}},
446+ 128 , 2 );
427447 case X86::VMOVAPDZ256rm:
428448 case X86::VMOVAPSZ256rm:
429449 case X86::VMOVUPDZ256rm:
@@ -433,6 +453,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
433453 {X86::VBROADCASTSDZ256rm, 1 , 64 , rebuildSplatCst},
434454 {X86::VBROADCASTF32X4Z256rm, 1 , 128 , rebuildSplatCst}},
435455 256 , 1 );
456+ case X86::VMOVAPDZ256rmk:
457+ case X86::VMOVUPDZ256rmk:
458+ return FixupConstant ({{X86::VBROADCASTSDZ256rmk, 1 , 64 , rebuildSplatCst}},
459+ 256 , 3 );
460+ case X86::VMOVAPDZ256rmkz:
461+ case X86::VMOVUPDZ256rmkz:
462+ return FixupConstant ({{X86::VBROADCASTSDZ256rmkz, 1 , 64 , rebuildSplatCst}},
463+ 256 , 2 );
464+ case X86::VMOVAPSZ256rmk:
465+ case X86::VMOVUPSZ256rmk:
466+ return FixupConstant (
467+ {{X86::VBROADCASTSSZ256rmk, 1 , 32 , rebuildSplatCst},
468+ {X86::VBROADCASTF32X4Z256rmk, 1 , 128 , rebuildSplatCst}},
469+ 256 , 3 );
470+ case X86::VMOVAPSZ256rmkz:
471+ case X86::VMOVUPSZ256rmkz:
472+ return FixupConstant (
473+ {{X86::VBROADCASTSSZ256rmkz, 1 , 32 , rebuildSplatCst},
474+ {X86::VBROADCASTF32X4Z256rmkz, 1 , 128 , rebuildSplatCst}},
475+ 256 , 2 );
436476 case X86::VMOVAPDZrm:
437477 case X86::VMOVAPSZrm:
438478 case X86::VMOVUPDZrm:
@@ -442,6 +482,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
442482 {X86::VBROADCASTF32X4rm, 1 , 128 , rebuildSplatCst},
443483 {X86::VBROADCASTF64X4rm, 1 , 256 , rebuildSplatCst}},
444484 512 , 1 );
485+ case X86::VMOVAPDZrmk:
486+ case X86::VMOVUPDZrmk:
487+ return FixupConstant ({{X86::VBROADCASTSDZrmk, 1 , 64 , rebuildSplatCst},
488+ {X86::VBROADCASTF64X4rmk, 1 , 256 , rebuildSplatCst}},
489+ 512 , 3 );
490+ case X86::VMOVAPDZrmkz:
491+ case X86::VMOVUPDZrmkz:
492+ return FixupConstant ({{X86::VBROADCASTSDZrmkz, 1 , 64 , rebuildSplatCst},
493+ {X86::VBROADCASTF64X4rmkz, 1 , 256 , rebuildSplatCst}},
494+ 512 , 2 );
495+ case X86::VMOVAPSZrmk:
496+ case X86::VMOVUPSZrmk:
497+ return FixupConstant ({{X86::VBROADCASTSSZrmk, 1 , 32 , rebuildSplatCst},
498+ {X86::VBROADCASTF32X4rmk, 1 , 128 , rebuildSplatCst}},
499+ 512 , 3 );
500+ case X86::VMOVAPSZrmkz:
501+ case X86::VMOVUPSZrmkz:
502+ return FixupConstant ({{X86::VBROADCASTSSZrmkz, 1 , 32 , rebuildSplatCst},
503+ {X86::VBROADCASTF32X4rmkz, 1 , 128 , rebuildSplatCst}},
504+ 512 , 2 );
445505 /* Integer Loads */
446506 case X86::MOVDQArm:
447507 case X86::MOVDQUrm: {
@@ -537,6 +597,42 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
537597 {X86::VPMOVZXDQZ128rm, 2 , 32 , rebuildZExtCst}};
538598 return FixupConstant (Fixups, 128 , 1 );
539599 }
600+ case X86::VMOVDQA32Z128rmk:
601+ case X86::VMOVDQU32Z128rmk:
602+ return FixupConstant ({{X86::VPBROADCASTDZ128rmk, 1 , 32 , rebuildSplatCst},
603+ {X86::VPMOVSXBDZ128rmk, 4 , 8 , rebuildSExtCst},
604+ {X86::VPMOVZXBDZ128rmk, 4 , 8 , rebuildZExtCst},
605+ {X86::VPMOVSXWDZ128rmk, 4 , 16 , rebuildSExtCst},
606+ {X86::VPMOVZXWDZ128rmk, 4 , 16 , rebuildZExtCst}},
607+ 128 , 3 );
608+ case X86::VMOVDQA32Z128rmkz:
609+ case X86::VMOVDQU32Z128rmkz:
610+ return FixupConstant ({{X86::VPBROADCASTDZ128rmkz, 1 , 32 , rebuildSplatCst},
611+ {X86::VPMOVSXBDZ128rmkz, 4 , 8 , rebuildSExtCst},
612+ {X86::VPMOVZXBDZ128rmkz, 4 , 8 , rebuildZExtCst},
613+ {X86::VPMOVSXWDZ128rmkz, 4 , 16 , rebuildSExtCst},
614+ {X86::VPMOVZXWDZ128rmkz, 4 , 16 , rebuildZExtCst}},
615+ 128 , 2 );
616+ case X86::VMOVDQA64Z128rmk:
617+ case X86::VMOVDQU64Z128rmk:
618+ return FixupConstant ({{X86::VPMOVSXBQZ128rmk, 2 , 8 , rebuildSExtCst},
619+ {X86::VPMOVZXBQZ128rmk, 2 , 8 , rebuildZExtCst},
620+ {X86::VPMOVSXWQZ128rmk, 2 , 16 , rebuildSExtCst},
621+ {X86::VPMOVZXWQZ128rmk, 2 , 16 , rebuildZExtCst},
622+ {X86::VPBROADCASTQZ128rmk, 1 , 64 , rebuildSplatCst},
623+ {X86::VPMOVSXDQZ128rmk, 2 , 32 , rebuildSExtCst},
624+ {X86::VPMOVZXDQZ128rmk, 2 , 32 , rebuildZExtCst}},
625+ 128 , 3 );
626+ case X86::VMOVDQA64Z128rmkz:
627+ case X86::VMOVDQU64Z128rmkz:
628+ return FixupConstant ({{X86::VPMOVSXBQZ128rmkz, 2 , 8 , rebuildSExtCst},
629+ {X86::VPMOVZXBQZ128rmkz, 2 , 8 , rebuildZExtCst},
630+ {X86::VPMOVSXWQZ128rmkz, 2 , 16 , rebuildSExtCst},
631+ {X86::VPMOVZXWQZ128rmkz, 2 , 16 , rebuildZExtCst},
632+ {X86::VPBROADCASTQZ128rmkz, 1 , 64 , rebuildSplatCst},
633+ {X86::VPMOVSXDQZ128rmkz, 2 , 32 , rebuildSExtCst},
634+ {X86::VPMOVZXDQZ128rmkz, 2 , 32 , rebuildZExtCst}},
635+ 128 , 2 );
540636 case X86::VMOVDQA32Z256rm:
541637 case X86::VMOVDQA64Z256rm:
542638 case X86::VMOVDQU32Z256rm:
@@ -561,6 +657,46 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
561657 {X86::VPMOVZXDQZ256rm, 4 , 32 , rebuildZExtCst}};
562658 return FixupConstant (Fixups, 256 , 1 );
563659 }
660+ case X86::VMOVDQA32Z256rmk:
661+ case X86::VMOVDQU32Z256rmk:
662+ return FixupConstant (
663+ {{X86::VPBROADCASTDZ256rmk, 1 , 32 , rebuildSplatCst},
664+ {X86::VPMOVSXBDZ256rmk, 8 , 8 , rebuildSExtCst},
665+ {X86::VPMOVZXBDZ256rmk, 8 , 8 , rebuildZExtCst},
666+ {X86::VBROADCASTI32X4Z256rmk, 1 , 128 , rebuildSplatCst},
667+ {X86::VPMOVSXWDZ256rmk, 8 , 16 , rebuildSExtCst},
668+ {X86::VPMOVZXWDZ256rmk, 8 , 16 , rebuildZExtCst}},
669+ 256 , 3 );
670+ case X86::VMOVDQA32Z256rmkz:
671+ case X86::VMOVDQU32Z256rmkz:
672+ return FixupConstant (
673+ {{X86::VPBROADCASTDZ256rmkz, 1 , 32 , rebuildSplatCst},
674+ {X86::VPMOVSXBDZ256rmkz, 8 , 8 , rebuildSExtCst},
675+ {X86::VPMOVZXBDZ256rmkz, 8 , 8 , rebuildZExtCst},
676+ {X86::VBROADCASTI32X4Z256rmkz, 1 , 128 , rebuildSplatCst},
677+ {X86::VPMOVSXWDZ256rmkz, 8 , 16 , rebuildSExtCst},
678+ {X86::VPMOVZXWDZ256rmkz, 8 , 16 , rebuildZExtCst}},
679+ 256 , 2 );
680+ case X86::VMOVDQA64Z256rmk:
681+ case X86::VMOVDQU64Z256rmk:
682+ return FixupConstant ({{X86::VPMOVSXBQZ256rmk, 4 , 8 , rebuildSExtCst},
683+ {X86::VPMOVZXBQZ256rmk, 4 , 8 , rebuildZExtCst},
684+ {X86::VPBROADCASTQZ256rmk, 1 , 64 , rebuildSplatCst},
685+ {X86::VPMOVSXWQZ256rmk, 4 , 16 , rebuildSExtCst},
686+ {X86::VPMOVZXWQZ256rmk, 4 , 16 , rebuildZExtCst},
687+ {X86::VPMOVSXDQZ256rmk, 4 , 32 , rebuildSExtCst},
688+ {X86::VPMOVZXDQZ256rmk, 4 , 32 , rebuildZExtCst}},
689+ 256 , 3 );
690+ case X86::VMOVDQA64Z256rmkz:
691+ case X86::VMOVDQU64Z256rmkz:
692+ return FixupConstant ({{X86::VPMOVSXBQZ256rmkz, 4 , 8 , rebuildSExtCst},
693+ {X86::VPMOVZXBQZ256rmkz, 4 , 8 , rebuildZExtCst},
694+ {X86::VPBROADCASTQZ256rmkz, 1 , 64 , rebuildSplatCst},
695+ {X86::VPMOVSXWQZ256rmkz, 4 , 16 , rebuildSExtCst},
696+ {X86::VPMOVZXWQZ256rmkz, 4 , 16 , rebuildZExtCst},
697+ {X86::VPMOVSXDQZ256rmkz, 4 , 32 , rebuildSExtCst},
698+ {X86::VPMOVZXDQZ256rmkz, 4 , 32 , rebuildZExtCst}},
699+ 256 , 2 );
564700 case X86::VMOVDQA32Zrm:
565701 case X86::VMOVDQA64Zrm:
566702 case X86::VMOVDQU32Zrm:
@@ -586,43 +722,93 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
586722 {X86::VPMOVZXDQZrm, 8 , 32 , rebuildZExtCst}};
587723 return FixupConstant (Fixups, 512 , 1 );
588724 }
725+ case X86::VMOVDQA32Zrmk:
726+ case X86::VMOVDQU32Zrmk:
727+ return FixupConstant ({{X86::VPBROADCASTDZrmk, 1 , 32 , rebuildSplatCst},
728+ {X86::VBROADCASTI32X4rmk, 1 , 128 , rebuildSplatCst},
729+ {X86::VPMOVSXBDZrmk, 16 , 8 , rebuildSExtCst},
730+ {X86::VPMOVZXBDZrmk, 16 , 8 , rebuildZExtCst},
731+ {X86::VPMOVSXWDZrmk, 16 , 16 , rebuildSExtCst},
732+ {X86::VPMOVZXWDZrmk, 16 , 16 , rebuildZExtCst}},
733+ 512 , 3 );
734+ case X86::VMOVDQA32Zrmkz:
735+ case X86::VMOVDQU32Zrmkz:
736+ return FixupConstant ({{X86::VPBROADCASTDZrmkz, 1 , 32 , rebuildSplatCst},
737+ {X86::VBROADCASTI32X4rmkz, 1 , 128 , rebuildSplatCst},
738+ {X86::VPMOVSXBDZrmkz, 16 , 8 , rebuildSExtCst},
739+ {X86::VPMOVZXBDZrmkz, 16 , 8 , rebuildZExtCst},
740+ {X86::VPMOVSXWDZrmkz, 16 , 16 , rebuildSExtCst},
741+ {X86::VPMOVZXWDZrmkz, 16 , 16 , rebuildZExtCst}},
742+ 512 , 2 );
743+ case X86::VMOVDQA64Zrmk:
744+ case X86::VMOVDQU64Zrmk:
745+ return FixupConstant ({{X86::VPBROADCASTQZrmk, 1 , 64 , rebuildSplatCst},
746+ {X86::VPMOVSXBQZrmk, 8 , 8 , rebuildSExtCst},
747+ {X86::VPMOVZXBQZrmk, 8 , 8 , rebuildZExtCst},
748+ {X86::VPMOVSXWQZrmk, 8 , 16 , rebuildSExtCst},
749+ {X86::VPMOVZXWQZrmk, 8 , 16 , rebuildZExtCst},
750+ {X86::VBROADCASTI64X4rmk, 1 , 256 , rebuildSplatCst},
751+ {X86::VPMOVSXDQZrmk, 8 , 32 , rebuildSExtCst},
752+ {X86::VPMOVZXDQZrmk, 8 , 32 , rebuildZExtCst}},
753+ 512 , 3 );
754+ case X86::VMOVDQA64Zrmkz:
755+ case X86::VMOVDQU64Zrmkz:
756+ return FixupConstant ({{X86::VPBROADCASTQZrmkz, 1 , 64 , rebuildSplatCst},
757+ {X86::VPMOVSXBQZrmkz, 8 , 8 , rebuildSExtCst},
758+ {X86::VPMOVZXBQZrmkz, 8 , 8 , rebuildZExtCst},
759+ {X86::VPMOVSXWQZrmkz, 8 , 16 , rebuildSExtCst},
760+ {X86::VPMOVZXWQZrmkz, 8 , 16 , rebuildZExtCst},
761+ {X86::VBROADCASTI64X4rmkz, 1 , 256 , rebuildSplatCst},
762+ {X86::VPMOVSXDQZrmkz, 8 , 32 , rebuildSExtCst},
763+ {X86::VPMOVZXDQZrmkz, 8 , 32 , rebuildZExtCst}},
764+ 512 , 2 );
589765 }
590766
591- auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc32, unsigned OpSrc64) {
592- unsigned OpBcst32 = 0 , OpBcst64 = 0 ;
593- unsigned OpNoBcst32 = 0 , OpNoBcst64 = 0 ;
767+ auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc16, unsigned OpSrc32,
768+ unsigned OpSrc64) {
769+ if (OpSrc16) {
770+ if (const X86FoldTableEntry *Mem2Bcst =
771+ llvm::lookupBroadcastFoldTableBySize (OpSrc16, 16 )) {
772+ unsigned OpBcst16 = Mem2Bcst->DstOp ;
773+ unsigned OpNoBcst16 = Mem2Bcst->Flags & TB_INDEX_MASK;
774+ FixupEntry Fixups[] = {{(int )OpBcst16, 1 , 16 , rebuildSplatCst}};
775+ // TODO: Add support for RegBitWidth, but currently rebuildSplatCst
776+ // doesn't require it (defaults to Constant::getPrimitiveSizeInBits).
777+ if (FixupConstant (Fixups, 0 , OpNoBcst16))
778+ return true ;
779+ }
780+ }
594781 if (OpSrc32) {
595782 if (const X86FoldTableEntry *Mem2Bcst =
596783 llvm::lookupBroadcastFoldTableBySize (OpSrc32, 32 )) {
597- OpBcst32 = Mem2Bcst->DstOp ;
598- OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
784+ unsigned OpBcst32 = Mem2Bcst->DstOp ;
785+ unsigned OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
786+ FixupEntry Fixups[] = {{(int )OpBcst32, 1 , 32 , rebuildSplatCst}};
787+ // TODO: Add support for RegBitWidth, but currently rebuildSplatCst
788+ // doesn't require it (defaults to Constant::getPrimitiveSizeInBits).
789+ if (FixupConstant (Fixups, 0 , OpNoBcst32))
790+ return true ;
599791 }
600792 }
601793 if (OpSrc64) {
602794 if (const X86FoldTableEntry *Mem2Bcst =
603795 llvm::lookupBroadcastFoldTableBySize (OpSrc64, 64 )) {
604- OpBcst64 = Mem2Bcst->DstOp ;
605- OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
796+ unsigned OpBcst64 = Mem2Bcst->DstOp ;
797+ unsigned OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
798+ FixupEntry Fixups[] = {{(int )OpBcst64, 1 , 64 , rebuildSplatCst}};
799+ // TODO: Add support for RegBitWidth, but currently rebuildSplatCst
800+ // doesn't require it (defaults to Constant::getPrimitiveSizeInBits).
801+ if (FixupConstant (Fixups, 0 , OpNoBcst64))
802+ return true ;
606803 }
607804 }
608- assert (((OpBcst32 == 0 ) || (OpBcst64 == 0 ) || (OpNoBcst32 == OpNoBcst64)) &&
609- " OperandNo mismatch" );
610-
611- if (OpBcst32 || OpBcst64) {
612- unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
613- FixupEntry Fixups[] = {{(int )OpBcst32, 32 , 32 , rebuildSplatCst},
614- {(int )OpBcst64, 64 , 64 , rebuildSplatCst}};
615- // TODO: Add support for RegBitWidth, but currently rebuildSplatCst
616- // doesn't require it (defaults to Constant::getPrimitiveSizeInBits).
617- return FixupConstant (Fixups, 0 , OpNo);
618- }
619805 return false ;
620806 };
621807
622808 // Attempt to find a AVX512 mapping from a full width memory-fold instruction
623809 // to a broadcast-fold instruction variant.
624810 if ((MI.getDesc ().TSFlags & X86II::EncodingMask) == X86II::EVEX)
625- return ConvertToBroadcastAVX512 (Opc, Opc);
811+ return ConvertToBroadcastAVX512 (Opc, Opc, Opc );
626812
627813 // Reverse the X86InstrInfo::setExecutionDomainCustom EVEX->VEX logic
628814 // conversion to see if we can convert to a broadcasted (integer) logic op.
@@ -679,7 +865,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
679865 break ;
680866 }
681867 if (OpSrc32 || OpSrc64)
682- return ConvertToBroadcastAVX512 (OpSrc32, OpSrc64);
868+ return ConvertToBroadcastAVX512 (0 , OpSrc32, OpSrc64);
683869 }
684870
685871 return false ;
0 commit comments