-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[VPlan] Enforce that there is only ever one header mask. NFC #152489
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
We almost only ever have one header mask, except with the data tail folding style, i.e. with VPInstruction::ActiveLaneMask. All we need to do is to make sure to erase the old header icmp based header mask when replacing it.
|
@llvm/pr-subscribers-llvm-transforms Author: Luke Lau (lukel97) ChangesWe almost only ever have one header mask, except with the data tail All we need to do is to make sure to erase the old header icmp based Full diff: https://github.com/llvm/llvm-project/pull/152489.diff 1 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 1c8bd6c7eefc0..ae2aae590873c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2039,11 +2039,11 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
return LaneMaskPhi;
}
-/// Collect all VPValues representing a header mask through the (ICMP_ULE,
-/// WideCanonicalIV, backedge-taken-count) pattern.
+/// Collect the header mask with the pattern:
+/// (ICMP_ULE, WideCanonicalIV, backedge-taken-count)
/// TODO: Introduce explicit recipe for header-mask instead of searching
/// for the header-mask pattern manually.
-static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
+static VPSingleDefRecipe *findHeaderMask(VPlan &Plan) {
SmallVector<VPValue *> WideCanonicalIVs;
auto *FoundWidenCanonicalIVUser =
find_if(Plan.getCanonicalIV()->users(),
@@ -2069,7 +2069,7 @@ static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
// Walk users of wide canonical IVs and collect to all compares of the form
// (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
- SmallVector<VPValue *> HeaderMasks;
+ SmallVector<VPSingleDefRecipe *> HeaderMasks;
for (auto *Wide : WideCanonicalIVs) {
for (VPUser *U : SmallVector<VPUser *>(Wide->users())) {
auto *HeaderMask = dyn_cast<VPInstruction>(U);
@@ -2081,7 +2081,10 @@ static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
HeaderMasks.push_back(HeaderMask);
}
}
- return HeaderMasks;
+ assert(HeaderMasks.size() <= 1 && "Multiple header masks found?");
+ if (HeaderMasks.empty())
+ return nullptr;
+ return HeaderMasks[0];
}
void VPlanTransforms::addActiveLaneMask(
@@ -2097,6 +2100,7 @@ void VPlanTransforms::addActiveLaneMask(
[](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
assert(FoundWidenCanonicalIVUser &&
"Must have widened canonical IV when tail folding!");
+ VPSingleDefRecipe *HeaderMask = findHeaderMask(Plan);
auto *WideCanonicalIV =
cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
VPSingleDefRecipe *LaneMask;
@@ -2113,8 +2117,8 @@ void VPlanTransforms::addActiveLaneMask(
// Walk users of WideCanonicalIV and replace all compares of the form
// (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an
// active-lane-mask.
- for (VPValue *HeaderMask : collectAllHeaderMasks(Plan))
- HeaderMask->replaceAllUsesWith(LaneMask);
+ HeaderMask->replaceAllUsesWith(LaneMask);
+ HeaderMask->eraseFromParent();
}
/// Try to optimize a \p CurRecipe masked by \p HeaderMask to a corresponding
@@ -2242,8 +2246,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
}
}
- // Try to optimize header mask recipes away to their EVL variants.
- for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
+ // Try to optimize recipes which use the header mask to their EVL variants.
+ if (VPValue *HeaderMask = findHeaderMask(Plan)) {
// TODO: Split optimizeMaskToEVL out and move into
// VPlanTransforms::optimize. transformRecipestoEVLRecipes should be run in
// tryToBuildVPlanWithVPRecipes beforehand.
@@ -2269,7 +2273,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
ToErase.push_back(CurRecipe);
}
- // Replace header masks with a mask equivalent to predicating by EVL:
+ // Replace the header mask with a mask equivalent to predicating by EVL:
//
// icmp ule widen-canonical-iv backedge-taken-count
// ->
|
|
@llvm/pr-subscribers-vectorizers Author: Luke Lau (lukel97) ChangesWe almost only ever have one header mask, except with the data tail All we need to do is to make sure to erase the old header icmp based Full diff: https://github.com/llvm/llvm-project/pull/152489.diff 1 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 1c8bd6c7eefc0..ae2aae590873c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2039,11 +2039,11 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
return LaneMaskPhi;
}
-/// Collect all VPValues representing a header mask through the (ICMP_ULE,
-/// WideCanonicalIV, backedge-taken-count) pattern.
+/// Collect the header mask with the pattern:
+/// (ICMP_ULE, WideCanonicalIV, backedge-taken-count)
/// TODO: Introduce explicit recipe for header-mask instead of searching
/// for the header-mask pattern manually.
-static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
+static VPSingleDefRecipe *findHeaderMask(VPlan &Plan) {
SmallVector<VPValue *> WideCanonicalIVs;
auto *FoundWidenCanonicalIVUser =
find_if(Plan.getCanonicalIV()->users(),
@@ -2069,7 +2069,7 @@ static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
// Walk users of wide canonical IVs and collect to all compares of the form
// (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
- SmallVector<VPValue *> HeaderMasks;
+ SmallVector<VPSingleDefRecipe *> HeaderMasks;
for (auto *Wide : WideCanonicalIVs) {
for (VPUser *U : SmallVector<VPUser *>(Wide->users())) {
auto *HeaderMask = dyn_cast<VPInstruction>(U);
@@ -2081,7 +2081,10 @@ static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
HeaderMasks.push_back(HeaderMask);
}
}
- return HeaderMasks;
+ assert(HeaderMasks.size() <= 1 && "Multiple header masks found?");
+ if (HeaderMasks.empty())
+ return nullptr;
+ return HeaderMasks[0];
}
void VPlanTransforms::addActiveLaneMask(
@@ -2097,6 +2100,7 @@ void VPlanTransforms::addActiveLaneMask(
[](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
assert(FoundWidenCanonicalIVUser &&
"Must have widened canonical IV when tail folding!");
+ VPSingleDefRecipe *HeaderMask = findHeaderMask(Plan);
auto *WideCanonicalIV =
cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
VPSingleDefRecipe *LaneMask;
@@ -2113,8 +2117,8 @@ void VPlanTransforms::addActiveLaneMask(
// Walk users of WideCanonicalIV and replace all compares of the form
// (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an
// active-lane-mask.
- for (VPValue *HeaderMask : collectAllHeaderMasks(Plan))
- HeaderMask->replaceAllUsesWith(LaneMask);
+ HeaderMask->replaceAllUsesWith(LaneMask);
+ HeaderMask->eraseFromParent();
}
/// Try to optimize a \p CurRecipe masked by \p HeaderMask to a corresponding
@@ -2242,8 +2246,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
}
}
- // Try to optimize header mask recipes away to their EVL variants.
- for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
+ // Try to optimize recipes which use the header mask to their EVL variants.
+ if (VPValue *HeaderMask = findHeaderMask(Plan)) {
// TODO: Split optimizeMaskToEVL out and move into
// VPlanTransforms::optimize. transformRecipestoEVLRecipes should be run in
// tryToBuildVPlanWithVPRecipes beforehand.
@@ -2269,7 +2273,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
ToErase.push_back(CurRecipe);
}
- // Replace header masks with a mask equivalent to predicating by EVL:
+ // Replace the header mask with a mask equivalent to predicating by EVL:
//
// icmp ule widen-canonical-iv backedge-taken-count
// ->
|
| [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); }); | ||
| assert(FoundWidenCanonicalIVUser && | ||
| "Must have widened canonical IV when tail folding!"); | ||
| VPSingleDefRecipe *HeaderMask = findHeaderMask(Plan); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need to hoist the call upwards here because once we create the VPInstruction::ActiveLaneMask the assert is violated
| // Try to optimize header mask recipes away to their EVL variants. | ||
| for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) { | ||
| // Try to optimize recipes which use the header mask to their EVL variants. | ||
| if (VPValue *HeaderMask = findHeaderMask(Plan)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a situation where there is no header mask?
I think it would be better to use an assert here if a header mask always exists when do addExplicitVectorLength.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It can be optimized away if there are no users and it's dead. There were a few RISC-V test cases where this happens
…ze/single-header-mask
| // tryToBuildVPlanWithVPRecipes. | ||
| for (VPRecipeBase *R : reverse(ToErase)) { | ||
| SmallVector<VPValue *> PossiblyDead(R->operands()); | ||
| R->eraseFromParent(); | ||
| for (VPValue *Op : PossiblyDead) | ||
| recursivelyDeleteDeadRecipes(Op); | ||
| } | ||
| return; | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Once we move the variable step transform into tryToBuildVPlanWithVPRecipes then we can just rely on the removeDeadRecipes pass.
| // Walk users of wide canonical IVs and collect to all compares of the form | ||
| // (ICMP_ULE, WideCanonicalIV, backedge-taken-count). | ||
| SmallVector<VPValue *> HeaderMasks; | ||
| SmallVector<VPSingleDefRecipe *> HeaderMasks; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why need an array here? Should be anough single pointer and an assert in the loop
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point, fixed in ff6d700
| } | ||
|
|
||
| VPValue *HeaderMask = findHeaderMask(Plan); | ||
| if (!HeaderMask) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we do
VPValue *HeaderMask = findHeaderMask(Plan);
if (HeaderMask) {
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
......
}
if (HeaderMask->getNumUsers() != 0) {
VPValue *EVLMask = Builder.createICmp(...);
HeaderMask->replaceAllUsesWith(EVLMask);
ToErase.push_back(HeaderMask->getDefiningRecipe());
}
}
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could, I chose the early exit just to minimize the diff. We can actually just leave the old FOR splice behind anyway, there's another run of simplifyDeadRecipes after this: f7b797e
…ze/single-header-mask
It'll get cleaned up by removeDeadRecipes anyway
|
|
||
| // Walk users of WideCanonicalIV and replace all compares of the form | ||
| // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an | ||
| // active-lane-mask. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // active-lane-mask. | |
| // active-lane-mask and remove the old one to ensure that there always is only a single header mask. |
fhahn
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
We almost only ever have one header mask, except with the data tail folding style, i.e. with VPInstruction::ActiveLaneMask.
All we need to do is to make sure to erase the old header icmp based header mask when replacing it.