From ebfd7d0dc423c5784ff818aaf15a0387c93eb04a Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Tue, 15 Jul 2025 16:20:09 -0400 Subject: [PATCH 1/4] [DirectX] Support ConstExpr GEPs - Fixes #150050 - Address the issue of many nested geps - Check for ConstantExpr GEP if we see it check if it needs a global replacement with a new type. Create the new constExpr Gep and set the pointer operand to it. Finally cleanup and remove the old nested geps. --- .../Target/DirectX/DXILDataScalarization.cpp | 73 +++++++++++++++++ .../bugfix_150050_data_scalarize_const_gep.ll | 80 +++++++++++++++++++ 2 files changed, 153 insertions(+) create mode 100644 llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp index d9d9b36d0b739..5ec55a6718b9a 100644 --- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp +++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp @@ -300,11 +300,84 @@ bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) { return replaceDynamicExtractElementInst(EEI); } +static void buildConstExprGEPChain(GetElementPtrInst &GEPI, Value *CurrentPtr, + SmallVector &GEPChain, + IRBuilder<> &Builder) { + // Process the rest of the chain in reverse order (skipping the innermost) + for (int I = GEPChain.size() - 2; I >= 0; I--) { + ConstantExpr *CE = GEPChain[I]; + GetElementPtrInst *GEPInst = + cast(CE->getAsInstruction()); + GEPInst->insertBefore(GEPI.getIterator()); + SmallVector CurrIndices(GEPInst->indices()); + + // Create a new GEP instruction + Type *SourceTy = GEPInst->getSourceElementType(); + CurrentPtr = + Builder.CreateGEP(SourceTy, CurrentPtr, CurrIndices, GEPInst->getName(), + GEPInst->getNoWrapFlags()); + + // If this is the outermost GEP, update the main GEPI + if (I == 0) { + GEPI.setOperand(GEPI.getPointerOperandIndex(), CurrentPtr); + } + + // Clean up the temporary instruction + GEPInst->eraseFromParent(); + } +} + bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { Value *PtrOperand = GEPI.getPointerOperand(); Type *OrigGEPType = GEPI.getSourceElementType(); Type *NewGEPType = OrigGEPType; bool NeedsTransform = false; + // Check if the pointer operand is a ConstantExpr GEP + if (auto *PtrOpGEPCE = dyn_cast(PtrOperand); + PtrOpGEPCE && PtrOpGEPCE->getOpcode() == Instruction::GetElementPtr) { + + // Collect all nested GEPs in the chain + SmallVector GEPChain; + Value *BasePointer = PtrOpGEPCE->getOperand(0); + GEPChain.push_back(PtrOpGEPCE); + + // Walk up the chain to find all nested GEPs and the base pointer + while (auto *NextGEP = dyn_cast(BasePointer)) { + if (NextGEP->getOpcode() != Instruction::GetElementPtr) + break; + + GEPChain.push_back(NextGEP); + BasePointer = NextGEP->getOperand(0); + } + + // Check if the base pointer is a global that needs replacement + if (GlobalVariable *NewGlobal = lookupReplacementGlobal(BasePointer)) { + IRBuilder<> Builder(&GEPI); + + // Create a new GEP for the innermost GEP (last in the chain) + ConstantExpr *InnerGEPCE = GEPChain.back(); + GetElementPtrInst *InnerGEP = + cast(InnerGEPCE->getAsInstruction()); + InnerGEP->insertBefore(GEPI.getIterator()); + + SmallVector Indices(InnerGEP->indices()); + Type *NewGEPType = NewGlobal->getValueType(); + Value *NewInnerGEP = + Builder.CreateGEP(NewGEPType, NewGlobal, Indices, InnerGEP->getName(), + InnerGEP->getNoWrapFlags()); + + // If there's only one GEP in the chain, update the main GEPI directly + if (GEPChain.size() == 1) + GEPI.setOperand(GEPI.getPointerOperandIndex(), NewInnerGEP); + else + // For multiple GEPs, we need to create a chain of GEPs + buildConstExprGEPChain(GEPI, NewInnerGEP, GEPChain, Builder); + + // Clean up the innermost GEP + InnerGEP->eraseFromParent(); + return true; + } + } if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) { NewGEPType = NewGlobal->getValueType(); diff --git a/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll b/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll new file mode 100644 index 0000000000000..156a8e7c5c386 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll @@ -0,0 +1,80 @@ +; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.4-library %s | FileCheck %s --check-prefixes=SCHECK,CHECK +; RUN: opt -S -passes='dxil-data-scalarization,function(scalarizer),dxil-flatten-arrays' -mtriple=dxil-pc-shadermodel6.4-library %s | FileCheck %s --check-prefixes=FCHECK,CHECK + +@aTile = hidden addrspace(3) global [10 x [10 x <4 x i32>]] zeroinitializer, align 16 +@bTile = hidden addrspace(3) global [10 x [10 x i32]] zeroinitializer, align 16 +@cTile = internal global [2 x [2 x <2 x i32>]] zeroinitializer, align 16 +@dTile = internal global [2 x [2 x [2 x <2 x i32>]]] zeroinitializer, align 16 + +define void @CSMain() { +; CHECK-LABEL: define void @CSMain() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[AFRAGPACKED_I_SCALARIZE:%.*]] = alloca [4 x i32], align 16 +; +; SCHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [10 x <4 x i32>], ptr addrspace(3) getelementptr inbounds ([10 x [10 x [4 x i32]]], ptr addrspace(3) @aTile.scalarized, i32 0, i32 1), i32 0, i32 2 +; SCHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(3) [[TMP0]], align 16 +; SCHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[AFRAGPACKED_I_SCALARIZE]], align 16 +; +; FCHECK-NEXT: [[AFRAGPACKED_I_SCALARIZE_I14:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 1 +; FCHECK-NEXT: [[AFRAGPACKED_I_SCALARIZE_I25:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 2 +; FCHECK-NEXT: [[AFRAGPACKED_I_SCALARIZE_I36:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 3 +; FCHECK-NEXT: [[DOTI07:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([400 x i32], ptr addrspace(3) @aTile.scalarized.1dim, i32 0, i32 48), align 16 +; FCHECK-NEXT: [[DOTI119:%.*]] = load i32, ptr addrspace(3) getelementptr ([400 x i32], ptr addrspace(3) @aTile.scalarized.1dim, i32 0, i32 49), align 4 +; FCHECK-NEXT: [[DOTI2211:%.*]] = load i32, ptr addrspace(3) getelementptr ([400 x i32], ptr addrspace(3) @aTile.scalarized.1dim, i32 0, i32 50), align 8 +; FCHECK-NEXT: [[DOTI3313:%.*]] = load i32, ptr addrspace(3) getelementptr ([400 x i32], ptr addrspace(3) @aTile.scalarized.1dim, i32 0, i32 51), align 4 +; FCHECK-NEXT: store i32 [[DOTI07]], ptr [[AFRAGPACKED_I_SCALARIZE]], align 16 +; FCHECK-NEXT: store i32 [[DOTI119]], ptr [[AFRAGPACKED_I_SCALARIZE_I14]], align 4 +; FCHECK-NEXT: store i32 [[DOTI2211]], ptr [[AFRAGPACKED_I_SCALARIZE_I25]], align 8 +; FCHECK-NEXT: store i32 [[DOTI3313]], ptr [[AFRAGPACKED_I_SCALARIZE_I36]], align 4 +; +; CHECK-NEXT: ret void +entry: + %aFragPacked.i = alloca <4 x i32>, align 16 + %0 = load <4 x i32>, ptr addrspace(3) getelementptr inbounds ([10 x <4 x i32>], ptr addrspace(3) getelementptr inbounds ([10 x [10 x <4 x i32>]], ptr addrspace(3) @aTile, i32 0, i32 1), i32 0, i32 2), align 16 + store <4 x i32> %0, ptr %aFragPacked.i, align 16 + ret void +} + +define void @Main() { +; CHECK-LABEL: define void @Main() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[BFRAGPACKED_I:%.*]] = alloca i32, align 16 +; +; SCHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [10 x i32], ptr addrspace(3) getelementptr inbounds ([10 x [10 x i32]], ptr addrspace(3) @bTile, i32 0, i32 1), i32 0, i32 1 +; SCHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[TMP0]], align 16 +; SCHECK-NEXT: store i32 [[TMP1]], ptr [[BFRAGPACKED_I]], align 16 +; +; FCHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([100 x i32], ptr addrspace(3) @bTile.1dim, i32 0, i32 11), align 16 +; FCHECK-NEXT: store i32 [[TMP0]], ptr [[BFRAGPACKED_I]], align 16 +; +; CHECK-NEXT: ret void +entry: + %bFragPacked.i = alloca i32, align 16 + %0 = load i32, ptr addrspace(3) getelementptr inbounds ([10 x i32], ptr addrspace(3) getelementptr inbounds ([10 x [10 x i32]], ptr addrspace(3) @bTile, i32 0, i32 1), i32 0, i32 1), align 16 + store i32 %0, ptr %bFragPacked.i, align 16 + ret void +} + +define void @global_nested_geps_3d() { +; CHECK-LABEL: define void @global_nested_geps_3d() { +; SCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr getelementptr inbounds ([2 x <2 x i32>], ptr getelementptr inbounds ([2 x [2 x [2 x i32]]], ptr @cTile.scalarized, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1 +; SCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +; +; FCHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @cTile.scalarized.1dim, i32 0, i32 7), align 4 +; +; CHECK-NEXT: ret void + %1 = load i32, i32* getelementptr inbounds (<2 x i32>, <2 x i32>* getelementptr inbounds ([2 x <2 x i32>], [2 x <2 x i32>]* getelementptr inbounds ([2 x [2 x <2 x i32>]], [2 x [2 x <2 x i32>]]* @cTile, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), align 4 + ret void +} + +define void @global_nested_geps_4d() { +; CHECK-LABEL: define void @global_nested_geps_4d() { +; SCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr getelementptr inbounds ([2 x <2 x i32>], ptr getelementptr inbounds ([2 x [2 x <2 x i32>]], ptr getelementptr inbounds ([2 x [2 x [2 x [2 x i32]]]], ptr @dTile.scalarized, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), i32 0, i32 1 +; SCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +; +; FCHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @dTile.scalarized.1dim, i32 0, i32 15), align 4 +; +; CHECK-NEXT: ret void + %1 = load i32, i32* getelementptr inbounds (<2 x i32>, <2 x i32>* getelementptr inbounds ([2 x <2 x i32>], [2 x <2 x i32>]* getelementptr inbounds ([2 x [2 x <2 x i32>]], [2 x [2 x <2 x i32>]]* getelementptr inbounds ([2 x [2 x [2 x <2 x i32>]]], [2 x [2 x [2 x <2 x i32>]]]* @dTile, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), align 4 + ret void +} From 2c0e73cfd6321c7775b52f067fa986b74659e25d Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Wed, 23 Jul 2025 11:24:50 -0400 Subject: [PATCH 2/4] address pr comment --- .../Target/DirectX/DXILDataScalarization.cpp | 113 ++++-------------- 1 file changed, 26 insertions(+), 87 deletions(-) diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp index 5ec55a6718b9a..07ab86223d4ab 100644 --- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp +++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp @@ -300,83 +300,20 @@ bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) { return replaceDynamicExtractElementInst(EEI); } -static void buildConstExprGEPChain(GetElementPtrInst &GEPI, Value *CurrentPtr, - SmallVector &GEPChain, - IRBuilder<> &Builder) { - // Process the rest of the chain in reverse order (skipping the innermost) - for (int I = GEPChain.size() - 2; I >= 0; I--) { - ConstantExpr *CE = GEPChain[I]; - GetElementPtrInst *GEPInst = - cast(CE->getAsInstruction()); - GEPInst->insertBefore(GEPI.getIterator()); - SmallVector CurrIndices(GEPInst->indices()); - - // Create a new GEP instruction - Type *SourceTy = GEPInst->getSourceElementType(); - CurrentPtr = - Builder.CreateGEP(SourceTy, CurrentPtr, CurrIndices, GEPInst->getName(), - GEPInst->getNoWrapFlags()); - - // If this is the outermost GEP, update the main GEPI - if (I == 0) { - GEPI.setOperand(GEPI.getPointerOperandIndex(), CurrentPtr); - } - - // Clean up the temporary instruction - GEPInst->eraseFromParent(); - } -} - bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { - Value *PtrOperand = GEPI.getPointerOperand(); - Type *OrigGEPType = GEPI.getSourceElementType(); - Type *NewGEPType = OrigGEPType; + GEPOperator *GOp = cast(&GEPI); + Value *PtrOperand = GOp->getPointerOperand(); + Type *NewGEPType = GOp->getSourceElementType(); bool NeedsTransform = false; - // Check if the pointer operand is a ConstantExpr GEP - if (auto *PtrOpGEPCE = dyn_cast(PtrOperand); - PtrOpGEPCE && PtrOpGEPCE->getOpcode() == Instruction::GetElementPtr) { - - // Collect all nested GEPs in the chain - SmallVector GEPChain; - Value *BasePointer = PtrOpGEPCE->getOperand(0); - GEPChain.push_back(PtrOpGEPCE); - - // Walk up the chain to find all nested GEPs and the base pointer - while (auto *NextGEP = dyn_cast(BasePointer)) { - if (NextGEP->getOpcode() != Instruction::GetElementPtr) - break; - - GEPChain.push_back(NextGEP); - BasePointer = NextGEP->getOperand(0); - } - // Check if the base pointer is a global that needs replacement - if (GlobalVariable *NewGlobal = lookupReplacementGlobal(BasePointer)) { - IRBuilder<> Builder(&GEPI); - - // Create a new GEP for the innermost GEP (last in the chain) - ConstantExpr *InnerGEPCE = GEPChain.back(); - GetElementPtrInst *InnerGEP = - cast(InnerGEPCE->getAsInstruction()); - InnerGEP->insertBefore(GEPI.getIterator()); - - SmallVector Indices(InnerGEP->indices()); - Type *NewGEPType = NewGlobal->getValueType(); - Value *NewInnerGEP = - Builder.CreateGEP(NewGEPType, NewGlobal, Indices, InnerGEP->getName(), - InnerGEP->getNoWrapFlags()); - - // If there's only one GEP in the chain, update the main GEPI directly - if (GEPChain.size() == 1) - GEPI.setOperand(GEPI.getPointerOperandIndex(), NewInnerGEP); - else - // For multiple GEPs, we need to create a chain of GEPs - buildConstExprGEPChain(GEPI, NewInnerGEP, GEPChain, Builder); - - // Clean up the innermost GEP - InnerGEP->eraseFromParent(); - return true; - } + // Unwrap GEP ConstantExprs to find the base operand and element type + while (auto *CE = dyn_cast(PtrOperand)) { + if (auto *GEPCE = dyn_cast(CE)) { + GOp = GEPCE; + PtrOperand = GEPCE->getPointerOperand(); + NewGEPType = GEPCE->getSourceElementType(); + } else + break; } if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) { @@ -385,30 +322,32 @@ bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { NeedsTransform = true; } else if (AllocaInst *Alloca = dyn_cast(PtrOperand)) { Type *AllocatedType = Alloca->getAllocatedType(); - // Only transform if the allocated type is an array - if (AllocatedType != OrigGEPType && isa(AllocatedType)) { + if (isa(AllocatedType) && + AllocatedType != GOp->getResultElementType()) { NewGEPType = AllocatedType; NeedsTransform = true; } } - // Scalar geps should remain scalars geps. The dxil-flatten-arrays pass will - // convert these scalar geps into flattened array geps - if (!isa(OrigGEPType)) - NewGEPType = OrigGEPType; - - // Note: We bail if this isn't a gep touched via alloca or global - // transformations if (!NeedsTransform) return false; + // Keep scalar GEPs scalar; dxil-flatten-arrays will do flattening later + if (!isa(GOp->getSourceElementType())) + NewGEPType = GOp->getSourceElementType(); + IRBuilder<> Builder(&GEPI); SmallVector Indices(GEPI.indices()); - Value *NewGEP = Builder.CreateGEP(NewGEPType, PtrOperand, Indices, - GEPI.getName(), GEPI.getNoWrapFlags()); - GEPI.replaceAllUsesWith(NewGEP); - GEPI.eraseFromParent(); + GOp->getName(), GOp->getNoWrapFlags()); + + GOp->replaceAllUsesWith(NewGEP); + + if (auto *CE = dyn_cast(GOp)) + CE->destroyConstant(); + else if (auto *OldGEPI = dyn_cast(GOp)) + OldGEPI->eraseFromParent(); // This will always be true in visit* context + return true; } From d2aab9823b75367a86bb8d4d548e53c90e3aee91 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Wed, 23 Jul 2025 12:01:15 -0400 Subject: [PATCH 3/4] fix typo indicies need to be on GOp not GEPI --- llvm/lib/Target/DirectX/DXILDataScalarization.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp index 07ab86223d4ab..6ed35952df51d 100644 --- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp +++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp @@ -337,7 +337,7 @@ bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { NewGEPType = GOp->getSourceElementType(); IRBuilder<> Builder(&GEPI); - SmallVector Indices(GEPI.indices()); + SmallVector Indices(GOp->indices()); Value *NewGEP = Builder.CreateGEP(NewGEPType, PtrOperand, Indices, GOp->getName(), GOp->getNoWrapFlags()); From 50ac604b3a5767589a023ff71cfaf05337190ba9 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Wed, 23 Jul 2025 15:56:33 -0400 Subject: [PATCH 4/4] remove comment to address pr comment --- llvm/lib/Target/DirectX/DXILDataScalarization.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp index 6ed35952df51d..feecfc0880e25 100644 --- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp +++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp @@ -346,7 +346,7 @@ bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { if (auto *CE = dyn_cast(GOp)) CE->destroyConstant(); else if (auto *OldGEPI = dyn_cast(GOp)) - OldGEPI->eraseFromParent(); // This will always be true in visit* context + OldGEPI->eraseFromParent(); return true; }