Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ FunctionPass *createSIPostRABundlerPass();
FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *);
ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
FunctionPass *createAMDGPUCodeGenPreparePass();
FunctionPass *createAMDGPULateCodeGenPreparePass();
FunctionPass *createAMDGPULateCodeGenPrepareLegacyPass();
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
FunctionPass *createAMDGPURewriteOutArgumentsPass();
ModulePass *
Expand Down Expand Up @@ -282,6 +282,16 @@ class AMDGPUCodeGenPreparePass
PreservedAnalyses run(Function &, FunctionAnalysisManager &);
};

class AMDGPULateCodeGenPreparePass
: public PassInfoMixin<AMDGPULateCodeGenPreparePass> {
private:
const GCNTargetMachine &TM;

public:
AMDGPULateCodeGenPreparePass(const GCNTargetMachine &TM) : TM(TM) {};
PreservedAnalyses run(Function &, FunctionAnalysisManager &);
};

class AMDGPULowerKernelArgumentsPass
: public PassInfoMixin<AMDGPULowerKernelArgumentsPass> {
private:
Expand Down Expand Up @@ -352,8 +362,8 @@ extern char &AMDGPUCodeGenPrepareID;
void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &);
extern char &AMDGPURemoveIncompatibleFunctionsID;

void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
extern char &AMDGPULateCodeGenPrepareID;
void initializeAMDGPULateCodeGenPrepareLegacyPass(PassRegistry &);
extern char &AMDGPULateCodeGenPrepareLegacyID;

FunctionPass *createAMDGPURewriteUndefForPHILegacyPass();
void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &);
Expand Down
110 changes: 68 additions & 42 deletions llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,35 +42,21 @@ static cl::opt<bool>
namespace {

class AMDGPULateCodeGenPrepare
: public FunctionPass,
public InstVisitor<AMDGPULateCodeGenPrepare, bool> {
: public InstVisitor<AMDGPULateCodeGenPrepare, bool> {
Module *Mod = nullptr;
const DataLayout *DL = nullptr;
const GCNSubtarget &ST;

AssumptionCache *AC = nullptr;
UniformityInfo *UA = nullptr;

SmallVector<WeakTrackingVH, 8> DeadInsts;

public:
static char ID;

AMDGPULateCodeGenPrepare() : FunctionPass(ID) {}

StringRef getPassName() const override {
return "AMDGPU IR late optimizations";
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetPassConfig>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<UniformityInfoWrapperPass>();
AU.setPreservesAll();
}

bool doInitialization(Module &M) override;
bool runOnFunction(Function &F) override;

AMDGPULateCodeGenPrepare(Module &M, const GCNSubtarget &ST,
AssumptionCache *AC, UniformityInfo *UA)
: Mod(&M), DL(&M.getDataLayout()), ST(ST), AC(AC), UA(UA) {}
bool run(Function &F);
bool visitInstruction(Instruction &) { return false; }

// Check if the specified value is at least DWORD aligned.
Expand Down Expand Up @@ -148,23 +134,7 @@ class LiveRegOptimizer {

} // end anonymous namespace

bool AMDGPULateCodeGenPrepare::doInitialization(Module &M) {
Mod = &M;
DL = &Mod->getDataLayout();
return false;
}

bool AMDGPULateCodeGenPrepare::runOnFunction(Function &F) {
if (skipFunction(F))
return false;

const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
const TargetMachine &TM = TPC.getTM<TargetMachine>();
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
UA = &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();

bool AMDGPULateCodeGenPrepare::run(Function &F) {
// "Optimize" the virtual regs that cross basic block boundaries. When
// building the SelectionDAG, vectors of illegal types that cross basic blocks
// will be scalarized and widened, with each scalar living in its
Expand Down Expand Up @@ -505,16 +475,72 @@ bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
return true;
}

INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepare, DEBUG_TYPE,
PreservedAnalyses
AMDGPULateCodeGenPreparePass::run(Function &F, FunctionAnalysisManager &FAM) {
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

AssumptionCache &AC = FAM.getResult<AssumptionAnalysis>(F);
UniformityInfo &UI = FAM.getResult<UniformityInfoAnalysis>(F);

AMDGPULateCodeGenPrepare Impl(*F.getParent(), ST, &AC, &UI);

bool Changed = Impl.run(F);

PreservedAnalyses PA = PreservedAnalyses::none();
if (!Changed)
return PA;
PA.preserveSet<CFGAnalyses>();
return PA;
}

class AMDGPULateCodeGenPrepareLegacy : public FunctionPass {
public:
static char ID;

AMDGPULateCodeGenPrepareLegacy() : FunctionPass(ID) {}

StringRef getPassName() const override {
return "AMDGPU IR late optimizations";
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetPassConfig>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<UniformityInfoWrapperPass>();
AU.setPreservesAll();
}

bool runOnFunction(Function &F) override;
};

bool AMDGPULateCodeGenPrepareLegacy::runOnFunction(Function &F) {
if (skipFunction(F))
return false;

const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
const TargetMachine &TM = TPC.getTM<TargetMachine>();
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

AssumptionCache &AC =
getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
UniformityInfo &UI =
getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();

AMDGPULateCodeGenPrepare Impl(*F.getParent(), ST, &AC, &UI);

return Impl.run(F);
}

INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,
"AMDGPU IR late optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
INITIALIZE_PASS_END(AMDGPULateCodeGenPrepare, DEBUG_TYPE,
INITIALIZE_PASS_END(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,
"AMDGPU IR late optimizations", false, false)

char AMDGPULateCodeGenPrepare::ID = 0;
char AMDGPULateCodeGenPrepareLegacy::ID = 0;

FunctionPass *llvm::createAMDGPULateCodeGenPreparePass() {
return new AMDGPULateCodeGenPrepare();
FunctionPass *llvm::createAMDGPULateCodeGenPrepareLegacyPass() {
return new AMDGPULateCodeGenPrepareLegacy();
}
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ FUNCTION_PASS("amdgpu-annotate-uniform", AMDGPUAnnotateUniformValuesPass())
FUNCTION_PASS("amdgpu-codegenprepare", AMDGPUCodeGenPreparePass(*this))
FUNCTION_PASS("amdgpu-image-intrinsic-opt",
AMDGPUImageIntrinsicOptimizerPass(*this))
FUNCTION_PASS("amdgpu-late-codegenprepare",
AMDGPULateCodeGenPreparePass(
*static_cast<const GCNTargetMachine *>(this)))
FUNCTION_PASS("amdgpu-lower-kernel-arguments",
AMDGPULowerKernelArgumentsPass(*this))
FUNCTION_PASS("amdgpu-lower-kernel-attributes",
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUPromoteAllocaPass(*PR);
initializeAMDGPUPromoteAllocaToVectorPass(*PR);
initializeAMDGPUCodeGenPreparePass(*PR);
initializeAMDGPULateCodeGenPreparePass(*PR);
initializeAMDGPULateCodeGenPrepareLegacyPass(*PR);
initializeAMDGPURemoveIncompatibleFunctionsPass(*PR);
initializeAMDGPULowerModuleLDSLegacyPass(*PR);
initializeAMDGPULowerBufferFatPointersPass(*PR);
Expand Down Expand Up @@ -1227,7 +1227,7 @@ bool GCNPassConfig::addPreISel() {
addPass(createSinkingPass());

if (TM->getOptLevel() > CodeGenOptLevel::None)
addPass(createAMDGPULateCodeGenPreparePass());
addPass(createAMDGPULateCodeGenPrepareLegacyPass());

// Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
// regions formed by them.
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-late-codegenprepare %s | FileCheck %s -check-prefix=GFX9
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -amdgpu-late-codegenprepare %s | FileCheck %s -check-prefix=GFX12
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=amdgpu-late-codegenprepare %s | FileCheck %s -check-prefix=GFX9

; Make sure we don't crash when trying to create a bitcast between
; address spaces
Expand Down
Loading