From a0b48677b97a58326854668eb6183aa9de6c1fbe Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Wed, 20 Nov 2024 18:22:55 -0300 Subject: [PATCH 1/3] Add late optimization pass for riscv Signed-off-by: Mikhail R. Gadelha --- llvm/lib/Target/RISCV/CMakeLists.txt | 1 + llvm/lib/Target/RISCV/RISCV.h | 3 + llvm/lib/Target/RISCV/RISCVLatePeephole.cpp | 157 +++++++++++++++++++ llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 2 + 4 files changed, 163 insertions(+) create mode 100644 llvm/lib/Target/RISCV/RISCVLatePeephole.cpp diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index fd049d1a57860..d4eb016455b6d 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -34,6 +34,7 @@ add_llvm_target(RISCVCodeGen RISCVConstantPoolValue.cpp RISCVDeadRegisterDefinitions.cpp RISCVMakeCompressible.cpp + RISCVLatePeephole.cpp RISCVExpandAtomicPseudoInsts.cpp RISCVExpandPseudoInsts.cpp RISCVFrameLowering.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index d7bab601d545c..2e682eabd6e02 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -40,6 +40,9 @@ void initializeRISCVLandingPadSetupPass(PassRegistry &); FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel); +FunctionPass *createRISCVLatePeepholeOptPass(); +void initializeRISCVLatePeepholeOptPass(PassRegistry &); + FunctionPass *createRISCVMakeCompressibleOptPass(); void initializeRISCVMakeCompressibleOptPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVLatePeephole.cpp b/llvm/lib/Target/RISCV/RISCVLatePeephole.cpp new file mode 100644 index 0000000000000..11add97aa8413 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVLatePeephole.cpp @@ -0,0 +1,157 @@ +//===-- RISCVLatePeephole.cpp - Late stage peephole optimization ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// This file provides RISC-V specific target descriptions. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/RISCVMCTargetDesc.h" +#include "RISCV.h" +#include "RISCVInstrInfo.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-late-peephole" +#define RISCV_LATE_PEEPHOLE_NAME "RISC-V Late Stage Peephole" + +namespace { + +struct RISCVLatePeepholeOpt : public MachineFunctionPass { + static char ID; + + RISCVLatePeepholeOpt() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { return RISCV_LATE_PEEPHOLE_NAME; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &Fn) override; + +private: + void removeBlock(MachineBasicBlock *B, MachineBasicBlock *NewB); + bool removeSingleBranchBlock(MachineBasicBlock *B); + + const RISCVInstrInfo *RII = nullptr; + MachineFunction *MFN = nullptr; + MachineDominatorTree *MDT = nullptr; +}; +} // namespace + +char RISCVLatePeepholeOpt::ID = 0; +INITIALIZE_PASS(RISCVLatePeepholeOpt, "riscv-late-peephole", + RISCV_LATE_PEEPHOLE_NAME, false, false) + +void RISCVLatePeepholeOpt::removeBlock(MachineBasicBlock *B, MachineBasicBlock *NewB) { + LLVM_DEBUG(dbgs() << "Removing block '#'" << B->getNumber() << "\n"); + + // Transfer the immediate dominator information from B to its descendants. + MachineDomTreeNode *N = MDT->getNode(B); + MachineDomTreeNode *IDN = N->getIDom(); + if (IDN) { + MachineBasicBlock *IDB = IDN->getBlock(); + + using GTN = GraphTraits; + using DTNodeVectType = SmallVector; + + DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N)); + for (auto &I : Cn) { + MachineBasicBlock *SB = I->getBlock(); + MDT->changeImmediateDominator(SB, IDB); + } + } + + while (!B->succ_empty()) + B->removeSuccessor(B->succ_begin()); + + for (MachineBasicBlock *Pred : B->predecessors()) { + Pred->removeSuccessor(B, true); + // TODO: how do I calculate the branch probability here? + if (Pred != NewB) + Pred->addSuccessor(NewB); + } + + MDT->eraseNode(B); + MFN->erase(B->getIterator()); +} + +bool RISCVLatePeepholeOpt::removeSingleBranchBlock(MachineBasicBlock *B) { + LLVM_DEBUG(dbgs() << "Checking flow pattern at " << printMBBReference(*B) + << "\n"); + + if (B->size() != 1) + return false; + + MachineBasicBlock::const_iterator T1I = B->getFirstTerminator(); + if (T1I == B->end()) + return false; + unsigned Opc = T1I->getOpcode(); + if (Opc != RISCV::BEQ && Opc != RISCV::BNE) + return false; + + Register DstReg = T1I->getOperand(0).getReg(); + Register SrcReg = T1I->getOperand(1).getReg(); + if (DstReg != SrcReg) + return false; + + // Get the layout successor, or 0 if B does not have one. + MachineFunction::iterator NextBI = std::next(MachineFunction::iterator(B)); + MachineBasicBlock *NextB = (NextBI != MFN->end()) ? &*NextBI : nullptr; + + MachineBasicBlock *T1B = T1I->getOperand(2).getMBB(); + assert(std::next(T1I) == B->end()); + + MachineBasicBlock *T2B = NextB; + + MachineBasicBlock *PredB = B; + MachineBasicBlock *SuccB = Opc == RISCV::BEQ ? T1B : T2B; + MachineBasicBlock *DiscB = Opc == RISCV::BEQ ? T2B : T1B; + + LLVM_DEBUG(dbgs() << "Merging blocks '#'" << PredB->getNumber() << " and '#'" + << SuccB->getNumber() << "\n"); + + RII->removeBranch(*PredB); + PredB->removeSuccessor(DiscB); + PredB->splice(PredB->end(), SuccB, SuccB->begin(), SuccB->end()); + removeBlock(SuccB, PredB); + return true; +} + +bool RISCVLatePeepholeOpt::runOnMachineFunction(MachineFunction &Fn) { + if (skipFunction(Fn.getFunction())) + return false; + + auto &ST = Fn.getSubtarget(); + RII = ST.getInstrInfo(); + MFN = &Fn; + MDT = &getAnalysis().getDomTree(); + + bool Changed = false; + + for (MachineBasicBlock &MBB : Fn) + Changed |= removeSingleBranchBlock(&MBB); + + return Changed; +} + +/// Returns an instance of the Make Compressible Optimization pass. +FunctionPass *llvm::createRISCVLatePeepholeOptPass() { + return new RISCVLatePeepholeOpt(); +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index fa507653264cc..4e431fef4a82d 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -124,6 +124,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeRISCVPostLegalizerCombinerPass(*PR); initializeKCFIPass(*PR); initializeRISCVDeadRegisterDefinitionsPass(*PR); + initializeRISCVLatePeepholeOptPass(*PR); initializeRISCVMakeCompressibleOptPass(*PR); initializeRISCVGatherScatterLoweringPass(*PR); initializeRISCVCodeGenPreparePass(*PR); @@ -552,6 +553,7 @@ void RISCVPassConfig::addPreEmitPass() { EnableRISCVCopyPropagation) addPass(createMachineCopyPropagationPass(true)); addPass(&BranchRelaxationPassID); + addPass(createRISCVLatePeepholeOptPass()); addPass(createRISCVMakeCompressibleOptPass()); } From ed5cde82fd0d06d8499fe556b822c6b4c61d4282 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Wed, 20 Nov 2024 18:28:01 -0300 Subject: [PATCH 2/3] Added test Signed-off-by: Mikhail R. Gadelha --- llvm/test/CodeGen/RISCV/beqz-zero.ll | 61 ++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/beqz-zero.ll diff --git a/llvm/test/CodeGen/RISCV/beqz-zero.ll b/llvm/test/CodeGen/RISCV/beqz-zero.ll new file mode 100644 index 0000000000000..fcb909941b562 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/beqz-zero.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32 %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64 %s + + +define i32 @foo(i32 %0, i1 %1) { +; RV32-LABEL: foo: +; RV32: # %bb.0: +; RV32-NEXT: andi a1, a1, 1 +; RV32-NEXT: beqz a1, .LBB0_3 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: not a0, a0 +; RV32-NEXT: slli a0, a0, 18 +; RV32-NEXT: srli a0, a0, 31 +; RV32-NEXT: bnez a0, .LBB0_4 +; RV32-NEXT: .LBB0_2: +; RV32-NEXT: lui a0, 912096 +; RV32-NEXT: addi a0, a0, -1330 +; RV32-NEXT: ret +; RV32-NEXT: .LBB0_3: +; RV32-NEXT: beqz zero, .LBB0_2 +; RV32-NEXT: .LBB0_4: +; RV32-NEXT: li a0, 1 +; RV32-NEXT: ret +; +; RV64-LABEL: foo: +; RV64: # %bb.0: +; RV64-NEXT: andi a1, a1, 1 +; RV64-NEXT: beqz a1, .LBB0_3 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: not a0, a0 +; RV64-NEXT: slli a0, a0, 50 +; RV64-NEXT: srli a0, a0, 63 +; RV64-NEXT: bnez a0, .LBB0_4 +; RV64-NEXT: .LBB0_2: +; RV64-NEXT: lui a0, 912096 +; RV64-NEXT: addiw a0, a0, -1330 +; RV64-NEXT: ret +; RV64-NEXT: .LBB0_3: +; RV64-NEXT: beqz zero, .LBB0_2 +; RV64-NEXT: .LBB0_4: +; RV64-NEXT: li a0, 1 +; RV64-NEXT: ret + br i1 %1, label %3, label %7 + +3: ; preds = %2 + %4 = lshr i32 %0, 13 + %5 = and i32 %4, 1 + %6 = xor i32 %5, 1 + br label %7 + +7: ; preds = %3, %2 + %8 = phi i32 [ %6, %3 ], [ 0, %2 ] + %9 = icmp eq i32 %8, 0 + %10 = select i1 %9, i32 -559023410, i32 1 + ret i32 %10 +} + + From f75f6a174387487d99d1c98a44bdbd56fec6fc6b Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Wed, 20 Nov 2024 18:30:53 -0300 Subject: [PATCH 3/3] Update test case with new opt Signed-off-by: Mikhail R. Gadelha --- llvm/test/CodeGen/RISCV/beqz-zero.ll | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/beqz-zero.ll b/llvm/test/CodeGen/RISCV/beqz-zero.ll index fcb909941b562..adc48a7ec8acb 100644 --- a/llvm/test/CodeGen/RISCV/beqz-zero.ll +++ b/llvm/test/CodeGen/RISCV/beqz-zero.ll @@ -15,12 +15,10 @@ define i32 @foo(i32 %0, i1 %1) { ; RV32-NEXT: slli a0, a0, 18 ; RV32-NEXT: srli a0, a0, 31 ; RV32-NEXT: bnez a0, .LBB0_4 -; RV32-NEXT: .LBB0_2: +; RV32-NEXT: .LBB0_3: ; RV32-NEXT: lui a0, 912096 ; RV32-NEXT: addi a0, a0, -1330 ; RV32-NEXT: ret -; RV32-NEXT: .LBB0_3: -; RV32-NEXT: beqz zero, .LBB0_2 ; RV32-NEXT: .LBB0_4: ; RV32-NEXT: li a0, 1 ; RV32-NEXT: ret @@ -34,12 +32,10 @@ define i32 @foo(i32 %0, i1 %1) { ; RV64-NEXT: slli a0, a0, 50 ; RV64-NEXT: srli a0, a0, 63 ; RV64-NEXT: bnez a0, .LBB0_4 -; RV64-NEXT: .LBB0_2: +; RV64-NEXT: .LBB0_3: ; RV64-NEXT: lui a0, 912096 ; RV64-NEXT: addiw a0, a0, -1330 ; RV64-NEXT: ret -; RV64-NEXT: .LBB0_3: -; RV64-NEXT: beqz zero, .LBB0_2 ; RV64-NEXT: .LBB0_4: ; RV64-NEXT: li a0, 1 ; RV64-NEXT: ret