@@ -9679,13 +9679,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
96799679 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
96809680 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
96819681
9682- // Recognize build vector patterns to emit VSX vector instructions
9683- // instead of loading value from memory.
9684- if (Subtarget.isISA3_1() && Subtarget.hasVSX()) {
9685- if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9686- return VecPat;
9687- }
9688-
96899682 if (Subtarget.hasP10Vector()) {
96909683 APInt BitMask(32, 0);
96919684 // If the value of the vector is all zeros or all ones,
@@ -9705,6 +9698,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
97059698 }
97069699 return SDV;
97079700 }
9701+ // Recognize build vector patterns to emit VSX vector instructions
9702+ // instead of loading value from memory.
9703+ if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9704+ return VecPat;
97089705 }
97099706 // Check if this is a splat of a constant value.
97109707 APInt APSplatBits, APSplatUndef;
@@ -15679,31 +15676,35 @@ using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
1567915676
1568015677static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
1568115678
15682- static const auto BaseLXVKQPatterns = []() {
15683- // LXVKQ instruction loads the Quadword value:
15684- // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
15685- return std::array<std::pair<APInt, uint32_t>, 1>{
15686- {{APInt(128, 0x8000000000000000ULL) << 64, 16}}};
15687- }();
15679+ // LXVKQ instruction loads the Quadword value:
15680+ // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
15681+ static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
15682+ static const uint32_t Uim = 16;
1568815683
1568915684 // Check for direct LXVKQ match (no shift needed)
15690- for (const auto &[BasePattern, Uim] : BaseLXVKQPatterns) {
15691- if (FullVal == BasePattern)
15692- return std::make_tuple(Uim, uint8_t{0});
15693- }
15685+ if (FullVal == BasePattern)
15686+ return std::make_tuple(Uim, uint8_t{0});
1569415687
15695- // Check if FullValue can be generated by (right) shifting a base pattern
15696- for (const auto &[BasePattern, Uim] : BaseLXVKQPatterns) {
15697- if (BasePattern.lshr(127) == FullVal)
15698- return std::make_tuple(Uim, uint8_t{127});
15699- }
15688+ // Check if FullValue is 1 (the result of the base pattern >> 127)
15689+ if (FullVal == APInt(128, 1))
15690+ return std::make_tuple(Uim, uint8_t{127});
1570015691
1570115692 return std::nullopt;
1570215693}
1570315694
15704- /// Combine vector loads to a single load by recognising patterns in the Build
15705- /// Vector. LXVKQ instruction load VSX vector with a special quadword value
15706- /// based on an immediate value.
15695+ /// Combine vector loads to a single load (using lxvkq) or splat with shift of a
15696+ /// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
15697+ /// LXVKQ instruction load VSX vector with a special quadword value based on an
15698+ /// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
15699+ /// 0x8000_0000_0000_0000_0000_0000_0000_0000.
15700+ /// This can be used to inline the build vector constants that have the
15701+ /// following patterns:
15702+ ///
15703+ /// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
15704+ /// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
15705+ /// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
15706+ /// combination of splatting and right shift instructions.
15707+
1570715708SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
1570815709 SelectionDAG &DAG) const {
1570915710
@@ -15775,8 +15776,8 @@ SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
1577515776 }
1577615777
1577715778 // The right shifted pattern can be constructed using a combination of
15778- // XXSPLITIB and VSRQ instruction. VSRQ uses the shift amount from the lower
15779- // 7 bits of byte 15. This can be specified using XXSPLITIB with immediate
15779+ // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
15780+ // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
1578015781 // value 255.
1578115782 SDValue ShiftAmountVec =
1578215783 SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
0 commit comments