diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 29526cf5a5273..a1134663c0e7a 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -10,9 +10,19 @@ // instructions are inserted. // // The purpose of this optimization is to make the VL argument, for instructions -// that have a VL argument, as small as possible. This is implemented by -// visiting each instruction in reverse order and checking that if it has a VL -// argument, whether the VL can be reduced. +// that have a VL argument, as small as possible. +// +// This is split into a sparse dataflow analysis where we determine what VL is +// demanded by each instruction first, and then afterwards try to reduce the VL +// of each instruction if it demands less than its VL operand. +// +// The analysis is explained in more detail in the 2025 EuroLLVM Developers' +// Meeting talk "Accidental Dataflow Analysis: Extending the RISC-V VL +// Optimizer", which is available on YouTube at +// https://www.youtube.com/watch?v=Mfb5fRSdJAc +// +// The slides for the talk are available at +// https://llvm.org/devmtg/2025-04/slides/technical_talk/lau_accidental_dataflow.pdf // //===---------------------------------------------------------------------===// @@ -30,6 +40,27 @@ using namespace llvm; namespace { +/// Wrapper around MachineOperand that defaults to immediate 0. +struct DemandedVL { + MachineOperand VL; + DemandedVL() : VL(MachineOperand::CreateImm(0)) {} + DemandedVL(MachineOperand VL) : VL(VL) {} + static DemandedVL vlmax() { + return DemandedVL(MachineOperand::CreateImm(RISCV::VLMaxSentinel)); + } + bool operator!=(const DemandedVL &Other) const { + return !VL.isIdenticalTo(Other.VL); + } + + DemandedVL max(const DemandedVL &X) const { + if (RISCV::isVLKnownLE(VL, X.VL)) + return X; + if (RISCV::isVLKnownLE(X.VL, VL)) + return *this; + return DemandedVL::vlmax(); + } +}; + class RISCVVLOptimizer : public MachineFunctionPass { const MachineRegisterInfo *MRI; const MachineDominatorTree *MDT; @@ -51,17 +82,25 @@ class RISCVVLOptimizer : public MachineFunctionPass { StringRef getPassName() const override { return PASS_NAME; } private: - std::optional - getMinimumVLForUser(const MachineOperand &UserOp) const; - /// Returns the largest common VL MachineOperand that may be used to optimize - /// MI. Returns std::nullopt if it failed to find a suitable VL. - std::optional checkUsers(const MachineInstr &MI) const; + DemandedVL getMinimumVLForUser(const MachineOperand &UserOp) const; + /// Returns true if the users of \p MI have compatible EEWs and SEWs. + bool checkUsers(const MachineInstr &MI) const; bool tryReduceVL(MachineInstr &MI) const; bool isCandidate(const MachineInstr &MI) const; + void transfer(const MachineInstr &MI); /// For a given instruction, records what elements of it are demanded by /// downstream users. - DenseMap> DemandedVLs; + DenseMap DemandedVLs; + SetVector Worklist; + + /// \returns all vector virtual registers that \p MI uses. + auto virtual_vec_uses(const MachineInstr &MI) const { + return make_filter_range(MI.uses(), [this](const MachineOperand &MO) { + return MO.isReg() && MO.getReg().isVirtual() && + RISCVRegisterInfo::isRVVRegClass(MRI->getRegClass(MO.getReg())); + }); + } }; /// Represents the EMUL and EEW of a MachineOperand. @@ -847,10 +886,15 @@ static std::optional getOperandInfo(const MachineOperand &MO) { return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(*Log2EEW, MI), *Log2EEW); } +static bool isTupleInsertInstr(const MachineInstr &MI); + /// Return true if this optimization should consider MI for VL reduction. This /// white-list approach simplifies this optimization for instructions that may /// have more complex semantics with relation to how it uses VL. static bool isSupportedInstr(const MachineInstr &MI) { + if (MI.isPHI() || MI.isFullCopy() || isTupleInsertInstr(MI)) + return true; + const RISCVVPseudosTable::PseudoInfo *RVV = RISCVVPseudosTable::getPseudoInfo(MI.getOpcode()); @@ -1348,21 +1392,24 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const { return true; } -std::optional +DemandedVL RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const { const MachineInstr &UserMI = *UserOp.getParent(); const MCInstrDesc &Desc = UserMI.getDesc(); + if (UserMI.isPHI() || UserMI.isFullCopy() || isTupleInsertInstr(UserMI)) + return DemandedVLs.lookup(&UserMI); + if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) { LLVM_DEBUG(dbgs() << " Abort due to lack of VL, assume that" " use VLMAX\n"); - return std::nullopt; + return DemandedVL::vlmax(); } if (RISCVII::readsPastVL( TII->get(RISCV::getRVVMCOpcode(UserMI.getOpcode())).TSFlags)) { LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n"); - return std::nullopt; + return DemandedVL::vlmax(); } unsigned VLOpNum = RISCVII::getVLOpNum(Desc); @@ -1376,11 +1423,10 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const { if (UserOp.isTied()) { assert(UserOp.getOperandNo() == UserMI.getNumExplicitDefs() && RISCVII::isFirstDefTiedToFirstUse(UserMI.getDesc())); - auto DemandedVL = DemandedVLs.lookup(&UserMI); - if (!DemandedVL || !RISCV::isVLKnownLE(*DemandedVL, VLOp)) { + if (!RISCV::isVLKnownLE(DemandedVLs.lookup(&UserMI).VL, VLOp)) { LLVM_DEBUG(dbgs() << " Abort because user is passthru in " "instruction with demanded tail\n"); - return std::nullopt; + return DemandedVL::vlmax(); } } @@ -1393,11 +1439,8 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const { // If we know the demanded VL of UserMI, then we can reduce the VL it // requires. - if (auto DemandedVL = DemandedVLs.lookup(&UserMI)) { - assert(isCandidate(UserMI)); - if (RISCV::isVLKnownLE(*DemandedVL, VLOp)) - return DemandedVL; - } + if (RISCV::isVLKnownLE(DemandedVLs.lookup(&UserMI).VL, VLOp)) + return DemandedVLs.lookup(&UserMI); return VLOp; } @@ -1450,22 +1493,23 @@ static bool isSegmentedStoreInstr(const MachineInstr &MI) { } } -std::optional -RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { - std::optional CommonVL; - SmallSetVector Worklist; +bool RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { + if (MI.isPHI() || MI.isFullCopy() || isTupleInsertInstr(MI)) + return true; + + SmallSetVector OpWorklist; SmallPtrSet PHISeen; for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) - Worklist.insert(&UserOp); + OpWorklist.insert(&UserOp); - while (!Worklist.empty()) { - MachineOperand &UserOp = *Worklist.pop_back_val(); + while (!OpWorklist.empty()) { + MachineOperand &UserOp = *OpWorklist.pop_back_val(); const MachineInstr &UserMI = *UserOp.getParent(); LLVM_DEBUG(dbgs() << " Checking user: " << UserMI << "\n"); if (UserMI.isFullCopy() && UserMI.getOperand(0).getReg().isVirtual()) { LLVM_DEBUG(dbgs() << " Peeking through uses of COPY\n"); - Worklist.insert_range(llvm::make_pointer_range( + OpWorklist.insert_range(llvm::make_pointer_range( MRI->use_operands(UserMI.getOperand(0).getReg()))); continue; } @@ -1481,8 +1525,8 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { // whole register group). if (!isTupleInsertInstr(CandidateMI) && !isSegmentedStoreInstr(CandidateMI)) - return std::nullopt; - Worklist.insert(&UseOp); + return false; + OpWorklist.insert(&UseOp); } continue; } @@ -1492,28 +1536,14 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { if (!PHISeen.insert(&UserMI).second) continue; LLVM_DEBUG(dbgs() << " Peeking through uses of PHI\n"); - Worklist.insert_range(llvm::make_pointer_range( + OpWorklist.insert_range(llvm::make_pointer_range( MRI->use_operands(UserMI.getOperand(0).getReg()))); continue; } - auto VLOp = getMinimumVLForUser(UserOp); - if (!VLOp) - return std::nullopt; - - // Use the largest VL among all the users. If we cannot determine this - // statically, then we cannot optimize the VL. - if (!CommonVL || RISCV::isVLKnownLE(*CommonVL, *VLOp)) { - CommonVL = *VLOp; - LLVM_DEBUG(dbgs() << " User VL is: " << VLOp << "\n"); - } else if (!RISCV::isVLKnownLE(*VLOp, *CommonVL)) { - LLVM_DEBUG(dbgs() << " Abort because cannot determine a common VL\n"); - return std::nullopt; - } - if (!RISCVII::hasSEWOp(UserMI.getDesc().TSFlags)) { LLVM_DEBUG(dbgs() << " Abort due to lack of SEW operand\n"); - return std::nullopt; + return false; } std::optional ConsumerInfo = getOperandInfo(UserOp); @@ -1522,7 +1552,7 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { LLVM_DEBUG(dbgs() << " Abort due to unknown operand information.\n"); LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n"); LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n"); - return std::nullopt; + return false; } if (!OperandInfo::areCompatible(*ProducerInfo, *ConsumerInfo)) { @@ -1531,11 +1561,11 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { << " Abort due to incompatible information for EMUL or EEW.\n"); LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n"); LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n"); - return std::nullopt; + return false; } } - return CommonVL; + return true; } bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const { @@ -1551,9 +1581,7 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const { return false; } - auto CommonVL = DemandedVLs.lookup(&MI); - if (!CommonVL) - return false; + auto *CommonVL = &DemandedVLs.at(&MI).VL; assert((CommonVL->isImm() || CommonVL->getReg().isVirtual()) && "Expected VL to be an Imm or virtual Reg"); @@ -1564,7 +1592,7 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const { const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg()); if (RISCVInstrInfo::isFaultOnlyFirstLoad(*VLMI) && !MDT->dominates(VLMI, &MI)) - CommonVL = VLMI->getOperand(RISCVII::getVLOpNum(VLMI->getDesc())); + CommonVL = &VLMI->getOperand(RISCVII::getVLOpNum(VLMI->getDesc())); } if (!RISCV::isVLKnownLE(*CommonVL, VLOp)) { @@ -1599,6 +1627,24 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const { return true; } +static bool isPhysical(const MachineOperand &MO) { + return MO.isReg() && MO.getReg().isPhysical(); +} + +/// Look through \p MI's operands and propagate what it demands to its uses. +void RISCVVLOptimizer::transfer(const MachineInstr &MI) { + if (!isSupportedInstr(MI) || !checkUsers(MI) || any_of(MI.defs(), isPhysical)) + DemandedVLs[&MI] = DemandedVL::vlmax(); + + for (const MachineOperand &MO : virtual_vec_uses(MI)) { + const MachineInstr *Def = MRI->getVRegDef(MO.getReg()); + DemandedVL Prev = DemandedVLs[Def]; + DemandedVLs[Def] = DemandedVLs[Def].max(getMinimumVLForUser(MO)); + if (DemandedVLs[Def] != Prev) + Worklist.insert(Def); + } +} + bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -1614,15 +1660,18 @@ bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) { assert(DemandedVLs.empty()); - // For each instruction that defines a vector, compute what VL its - // downstream users demand. + // For each instruction that defines a vector, propagate the VL it + // uses to its inputs. for (MachineBasicBlock *MBB : post_order(&MF)) { assert(MDT->isReachableFromEntry(MBB)); - for (MachineInstr &MI : reverse(*MBB)) { - if (!isCandidate(MI)) - continue; - DemandedVLs.insert({&MI, checkUsers(MI)}); - } + for (MachineInstr &MI : reverse(*MBB)) + Worklist.insert(&MI); + } + + while (!Worklist.empty()) { + const MachineInstr *MI = Worklist.front(); + Worklist.remove(MI); + transfer(*MI); } // Then go through and see if we can reduce the VL of any instructions to diff --git a/llvm/test/CodeGen/RISCV/rvv/reproducer-pr146855.ll b/llvm/test/CodeGen/RISCV/rvv/reproducer-pr146855.ll index cca00bf58063d..2d64defe8c7b1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/reproducer-pr146855.ll +++ b/llvm/test/CodeGen/RISCV/rvv/reproducer-pr146855.ll @@ -6,7 +6,7 @@ target triple = "riscv64-unknown-linux-gnu" define i32 @_ZN4Mesh12rezone_countESt6vectorIiSaIiEERiS3_( %wide.load, %0, %1, %2, %3) #0 { ; CHECK-LABEL: _ZN4Mesh12rezone_countESt6vectorIiSaIiEERiS3_: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 0, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: vmv.v.i v10, 0 @@ -14,7 +14,7 @@ define i32 @_ZN4Mesh12rezone_countESt6vectorIiSaIiEERiS3_( %wi ; CHECK-NEXT: vmv.v.i v14, 0 ; CHECK-NEXT: .LBB0_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vsetivli zero, 0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: vmv2r.v v16, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll index 20608cd6bed87..3844b984455c4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll @@ -238,3 +238,90 @@ define void @segmented_store_insert_subreg( %v0, , 3) %t2, ptr %p, iXLen %vl, iXLen 5) ret void } + +define void @recurrence( %v, ptr %p, iXLen %n, iXLen %vl) { +; CHECK-LABEL: recurrence: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: .LBB16_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: vadd.vv v10, v10, v8 +; CHECK-NEXT: bnez a1, .LBB16_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: vse32.v v10, (a0) +; CHECK-NEXT: ret +entry: + br label %loop +loop: + %iv = phi iXLen [ 0, %entry ], [ %iv.next, %loop ] + %phi = phi [ zeroinitializer, %entry ], [ %x, %loop ] + %x = add %phi, %v + %iv.next = add iXLen %iv, 1 + %done = icmp eq iXLen %iv.next, %n + br i1 %done, label %exit, label %loop +exit: + call void @llvm.riscv.vse( %x, ptr %p, iXLen %vl) + ret void +} + +define void @recurrence_vleff( %v, ptr %p, iXLen %n, iXLen %vl) { +; CHECK-LABEL: recurrence_vleff: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: .LBB17_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma +; CHECK-NEXT: vle32ff.v v10, (a3) +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a3, a3, 4 +; CHECK-NEXT: bnez a1, .LBB17_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: ret +entry: + br label %loop +loop: + %iv = phi iXLen [ 0, %entry ], [ %iv.next, %loop ] + %phi = phi [ zeroinitializer, %entry ], [ %y, %loop ] + %gep = getelementptr i32, ptr %p, iXLen %iv + %vleff = call { , iXLen } @llvm.riscv.vleff( poison, ptr %gep, iXLen %vl) + %vleff.x = extractvalue { , iXLen } %vleff, 0 + %vleff.vl = extractvalue { , iXLen } %vleff, 1 + %y = add %phi, %vleff.x + call void @llvm.riscv.vse( %y, ptr %p, iXLen %vleff.vl) + %iv.next = add iXLen %iv, 1 + %done = icmp eq iXLen %iv.next, %n + br i1 %done, label %exit, label %loop +exit: + ret void +} + +define @join( %v, i1 %cond, iXLen %vl) { +; CHECK-LABEL: join: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, 1 +; CHECK-NEXT: beqz a0, .LBB18_2 +; CHECK-NEXT: # %bb.1: # %foo +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, 1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB18_2: # %bar +; CHECK-NEXT: vadd.vi v8, v8, 2 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vadd( poison, %v, iXLen 1, iXLen -1) + br i1 %cond, label %foo, label %bar +foo: + %b = call @llvm.riscv.vadd( poison, %a, iXLen 1, iXLen 1) + ret %b +bar: + %c = call @llvm.riscv.vadd( poison, %a, iXLen 2, iXLen 2) + ret %c +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir index 086b3203ed5b0..9174b98de0aa9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir @@ -699,3 +699,74 @@ body: | %11:vr = PseudoVADD_VV_M1 $noreg, %2, $noreg, 10, 5 /* e32 */, 3 /* ta, ma */ $v10 = COPY %11 PseudoRET implicit $v10 +... +--- +name: recurrence +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: recurrence + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $x8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl:gprnox0 = COPY $x8 + ; CHECK-NEXT: %start:vr = PseudoVMV_V_I_M1 $noreg, 0, %avl, 3 /* e8 */, 3 /* ta, ma */ + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %phi:vr = PHI %start, %bb.0, %inc, %bb.1 + ; CHECK-NEXT: %inc:vr = PseudoVADD_VI_M1 $noreg, %phi, 1, %avl, 3 /* e8 */, 3 /* ta, ma */ + ; CHECK-NEXT: BNE $noreg, $noreg, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: PseudoVSE8_V_M1 %inc, $noreg, %avl, 3 /* e8 */ + bb.0: + liveins: $x8 + %avl:gprnox0 = COPY $x8 + %start:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 3 /* e8 */, 3, /* ta, ma */ + PseudoBR %bb.1 + bb.1: + %phi:vr = PHI %start, %bb.0, %inc, %bb.1 + %inc:vr = PseudoVADD_VI_M1 $noreg, %phi, 1, -1, 3 /* e8 */, 3 /* ta, ma */ + BNE $noreg, $noreg, %bb.1 + bb.2: + PseudoVSE8_V_M1 %inc, $noreg, %avl, 3 /* e8 */ +... +--- +name: recurrence_cant_reduce +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: recurrence_cant_reduce + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $x8, $x9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl1:gprnox0 = COPY $x8 + ; CHECK-NEXT: %avl2:gprnox0 = COPY $x8 + ; CHECK-NEXT: %start:vr = PseudoVMV_V_I_M1 $noreg, 0, %avl1, 3 /* e8 */, 3 /* ta, ma */ + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %phi:vr = PHI %start, %bb.0, %inc, %bb.1 + ; CHECK-NEXT: %inc:vr = PseudoVADD_VI_M1 $noreg, %phi, 1, %avl1, 3 /* e8 */, 3 /* ta, ma */ + ; CHECK-NEXT: BNE $noreg, $noreg, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: PseudoVSE8_V_M1 %inc, $noreg, %avl2, 3 /* e8 */ + bb.0: + liveins: $x8, $x9 + %avl1:gprnox0 = COPY $x8 + %avl2:gprnox0 = COPY $x8 + %start:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 3 /* e8 */, 3, /* ta, ma */ + PseudoBR %bb.1 + bb.1: + %phi:vr = PHI %start, %bb.0, %inc, %bb.1 + %inc:vr = PseudoVADD_VI_M1 $noreg, %phi, 1, %avl1, 3 /* e8 */, 3 /* ta, ma */ + BNE $noreg, $noreg, %bb.1 + bb.2: + PseudoVSE8_V_M1 %inc, $noreg, %avl2, 3 /* e8 */ +... diff --git a/llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll b/llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll index 4b9f9a0579c48..3a05477e64ccd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll @@ -11,7 +11,7 @@ ; which was responsible for speeding it up. define @same_vl_imm( %passthru, %a, %b) { - ; CHECK: User VL is: 4 + ; CHECK: Trying to reduce VL for %{{.+}}:vrm2 = PseudoVADD_VV_M2 ; CHECK: Abort due to CommonVL == VLOp, no point in reducing. %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %a, %b, i64 4) %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %v, %a, i64 4) @@ -19,7 +19,7 @@ define @same_vl_imm( %passthru, @same_vl_reg( %passthru, %a, %b, i64 %vl) { - ; CHECK: User VL is: %3:gprnox0 + ; CHECK: Trying to reduce VL for %{{.+}}:vrm2 = PseudoVADD_VV_M2 ; CHECK: Abort due to CommonVL == VLOp, no point in reducing. %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %a, %b, i64 %vl) %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %v, %a, i64 %vl)