@@ -24367,135 +24367,134 @@ class HorizontalReduction {
2436724367 VectorizedTree = GetNewVectorizedTree(
2436824368 VectorizedTree,
2436924369 emitReduction(Builder, *TTI, ReductionRoot->getType()));
24370- if (VectorizedTree) {
24371- // Reorder operands of bool logical op in the natural order to avoid
24372- // possible problem with poison propagation. If not possible to reorder
24373- // (both operands are originally RHS), emit an extra freeze instruction
24374- // for the LHS operand.
24375- // I.e., if we have original code like this:
24376- // RedOp1 = select i1 ?, i1 LHS, i1 false
24377- // RedOp2 = select i1 RHS, i1 ?, i1 false
24378-
24379- // Then, we swap LHS/RHS to create a new op that matches the poison
24380- // semantics of the original code.
24381-
24382- // If we have original code like this and both values could be poison:
24383- // RedOp1 = select i1 ?, i1 LHS, i1 false
24384- // RedOp2 = select i1 ?, i1 RHS, i1 false
24385-
24386- // Then, we must freeze LHS in the new op.
24387- auto FixBoolLogicalOps = [&, VectorizedTree](Value *&LHS, Value *&RHS,
24388- Instruction *RedOp1,
24389- Instruction *RedOp2,
24390- bool InitStep) {
24391- if (!AnyBoolLogicOp)
24392- return;
24393- if (isBoolLogicOp(RedOp1) && ((!InitStep && LHS == VectorizedTree) ||
24394- getRdxOperand(RedOp1, 0) == LHS ||
24395- isGuaranteedNotToBePoison(LHS, AC)))
24396- return;
24397- if (isBoolLogicOp(RedOp2) && ((!InitStep && RHS == VectorizedTree) ||
24398- getRdxOperand(RedOp2, 0) == RHS ||
24399- isGuaranteedNotToBePoison(RHS, AC))) {
24400- std::swap(LHS, RHS);
24401- return;
24402- }
24403- if (LHS != VectorizedTree)
24404- LHS = Builder.CreateFreeze(LHS);
24405- };
24406- // Finish the reduction.
24407- // Need to add extra arguments and not vectorized possible reduction
24408- // values.
24409- // Try to avoid dependencies between the scalar remainders after
24410- // reductions.
24411- auto FinalGen =
24412- [&](ArrayRef<std::pair<Instruction *, Value *>> InstVals,
24413- bool InitStep) {
24414- unsigned Sz = InstVals.size();
24415- SmallVector<std::pair<Instruction *, Value *>> ExtraReds(Sz / 2 +
24416- Sz % 2);
24417- for (unsigned I = 0, E = (Sz / 2) * 2; I < E; I += 2) {
24418- Instruction *RedOp = InstVals[I + 1].first;
24419- Builder.SetCurrentDebugLocation(RedOp->getDebugLoc());
24420- Value *RdxVal1 = InstVals[I].second;
24421- Value *StableRdxVal1 = RdxVal1;
24422- auto It1 = TrackedVals.find(RdxVal1);
24423- if (It1 != TrackedVals.end())
24424- StableRdxVal1 = It1->second;
24425- Value *RdxVal2 = InstVals[I + 1].second;
24426- Value *StableRdxVal2 = RdxVal2;
24427- auto It2 = TrackedVals.find(RdxVal2);
24428- if (It2 != TrackedVals.end())
24429- StableRdxVal2 = It2->second;
24430- // To prevent poison from leaking across what used to be
24431- // sequential, safe, scalar boolean logic operations, the
24432- // reduction operand must be frozen.
24433- FixBoolLogicalOps(StableRdxVal1, StableRdxVal2, InstVals[I].first,
24434- RedOp, InitStep);
24435- Value *ExtraRed = createOp(Builder, RdxKind, StableRdxVal1,
24436- StableRdxVal2, "op.rdx", ReductionOps);
24437- ExtraReds[I / 2] = std::make_pair(InstVals[I].first, ExtraRed);
24438- }
24439- if (Sz % 2 == 1)
24440- ExtraReds[Sz / 2] = InstVals.back();
24441- return ExtraReds;
24442- };
24443- SmallVector<std::pair<Instruction *, Value *>> ExtraReductions;
24444- ExtraReductions.emplace_back(cast<Instruction>(ReductionRoot),
24445- VectorizedTree);
24446- SmallPtrSet<Value *, 8> Visited;
24447- for (ArrayRef<Value *> Candidates : ReducedVals) {
24448- for (Value *RdxVal : Candidates) {
24449- if (!Visited.insert(RdxVal).second)
24450- continue;
24451- unsigned NumOps = VectorizedVals.lookup(RdxVal);
24452- for (Instruction *RedOp :
24453- ArrayRef(ReducedValsToOps.at(RdxVal)).drop_back(NumOps))
24454- ExtraReductions.emplace_back(RedOp, RdxVal);
24455- }
24370+
24371+ if (!VectorizedTree) {
24372+ if (!CheckForReusedReductionOps) {
24373+ for (ReductionOpsType &RdxOps : ReductionOps)
24374+ for (Value *RdxOp : RdxOps)
24375+ V.analyzedReductionRoot(cast<Instruction>(RdxOp));
2445624376 }
24457- // Iterate through all not-vectorized reduction values/extra arguments.
24458- bool InitStep = true;
24459- while (ExtraReductions.size() > 1) {
24460- SmallVector<std::pair<Instruction *, Value *>> NewReds =
24461- FinalGen(ExtraReductions, InitStep);
24462- ExtraReductions.swap(NewReds);
24463- InitStep = false;
24377+ return nullptr;
24378+ }
24379+
24380+ // Reorder operands of bool logical op in the natural order to avoid
24381+ // possible problem with poison propagation. If not possible to reorder
24382+ // (both operands are originally RHS), emit an extra freeze instruction
24383+ // for the LHS operand.
24384+ // I.e., if we have original code like this:
24385+ // RedOp1 = select i1 ?, i1 LHS, i1 false
24386+ // RedOp2 = select i1 RHS, i1 ?, i1 false
24387+
24388+ // Then, we swap LHS/RHS to create a new op that matches the poison
24389+ // semantics of the original code.
24390+
24391+ // If we have original code like this and both values could be poison:
24392+ // RedOp1 = select i1 ?, i1 LHS, i1 false
24393+ // RedOp2 = select i1 ?, i1 RHS, i1 false
24394+
24395+ // Then, we must freeze LHS in the new op.
24396+ auto FixBoolLogicalOps =
24397+ [&, VectorizedTree](Value *&LHS, Value *&RHS, Instruction *RedOp1,
24398+ Instruction *RedOp2, bool InitStep) {
24399+ if (!AnyBoolLogicOp)
24400+ return;
24401+ if (isBoolLogicOp(RedOp1) && ((!InitStep && LHS == VectorizedTree) ||
24402+ getRdxOperand(RedOp1, 0) == LHS ||
24403+ isGuaranteedNotToBePoison(LHS, AC)))
24404+ return;
24405+ if (isBoolLogicOp(RedOp2) && ((!InitStep && RHS == VectorizedTree) ||
24406+ getRdxOperand(RedOp2, 0) == RHS ||
24407+ isGuaranteedNotToBePoison(RHS, AC))) {
24408+ std::swap(LHS, RHS);
24409+ return;
24410+ }
24411+ if (LHS != VectorizedTree)
24412+ LHS = Builder.CreateFreeze(LHS);
24413+ };
24414+ // Finish the reduction.
24415+ // Need to add extra arguments and not vectorized possible reduction values.
24416+ // Try to avoid dependencies between the scalar remainders after reductions.
24417+ auto FinalGen = [&](ArrayRef<std::pair<Instruction *, Value *>> InstVals,
24418+ bool InitStep) {
24419+ unsigned Sz = InstVals.size();
24420+ SmallVector<std::pair<Instruction *, Value *>> ExtraReds(Sz / 2 + Sz % 2);
24421+ for (unsigned I = 0, E = (Sz / 2) * 2; I < E; I += 2) {
24422+ Instruction *RedOp = InstVals[I + 1].first;
24423+ Builder.SetCurrentDebugLocation(RedOp->getDebugLoc());
24424+ Value *RdxVal1 = InstVals[I].second;
24425+ Value *StableRdxVal1 = RdxVal1;
24426+ auto It1 = TrackedVals.find(RdxVal1);
24427+ if (It1 != TrackedVals.end())
24428+ StableRdxVal1 = It1->second;
24429+ Value *RdxVal2 = InstVals[I + 1].second;
24430+ Value *StableRdxVal2 = RdxVal2;
24431+ auto It2 = TrackedVals.find(RdxVal2);
24432+ if (It2 != TrackedVals.end())
24433+ StableRdxVal2 = It2->second;
24434+ // To prevent poison from leaking across what used to be sequential,
24435+ // safe, scalar boolean logic operations, the reduction operand must be
24436+ // frozen.
24437+ FixBoolLogicalOps(StableRdxVal1, StableRdxVal2, InstVals[I].first,
24438+ RedOp, InitStep);
24439+ Value *ExtraRed = createOp(Builder, RdxKind, StableRdxVal1,
24440+ StableRdxVal2, "op.rdx", ReductionOps);
24441+ ExtraReds[I / 2] = std::make_pair(InstVals[I].first, ExtraRed);
24442+ }
24443+ if (Sz % 2 == 1)
24444+ ExtraReds[Sz / 2] = InstVals.back();
24445+ return ExtraReds;
24446+ };
24447+ SmallVector<std::pair<Instruction *, Value *>> ExtraReductions;
24448+ ExtraReductions.emplace_back(cast<Instruction>(ReductionRoot),
24449+ VectorizedTree);
24450+ SmallPtrSet<Value *, 8> Visited;
24451+ for (ArrayRef<Value *> Candidates : ReducedVals) {
24452+ for (Value *RdxVal : Candidates) {
24453+ if (!Visited.insert(RdxVal).second)
24454+ continue;
24455+ unsigned NumOps = VectorizedVals.lookup(RdxVal);
24456+ for (Instruction *RedOp :
24457+ ArrayRef(ReducedValsToOps.at(RdxVal)).drop_back(NumOps))
24458+ ExtraReductions.emplace_back(RedOp, RdxVal);
2446424459 }
24465- VectorizedTree = ExtraReductions.front().second;
24460+ }
24461+ // Iterate through all not-vectorized reduction values/extra arguments.
24462+ bool InitStep = true;
24463+ while (ExtraReductions.size() > 1) {
24464+ SmallVector<std::pair<Instruction *, Value *>> NewReds =
24465+ FinalGen(ExtraReductions, InitStep);
24466+ ExtraReductions.swap(NewReds);
24467+ InitStep = false;
24468+ }
24469+ VectorizedTree = ExtraReductions.front().second;
2446624470
24467- ReductionRoot->replaceAllUsesWith(VectorizedTree);
24471+ ReductionRoot->replaceAllUsesWith(VectorizedTree);
2446824472
24469- // The original scalar reduction is expected to have no remaining
24470- // uses outside the reduction tree itself. Assert that we got this
24471- // correct, replace internal uses with undef, and mark for eventual
24472- // deletion.
24473+ // The original scalar reduction is expected to have no remaining
24474+ // uses outside the reduction tree itself. Assert that we got this
24475+ // correct, replace internal uses with undef, and mark for eventual
24476+ // deletion.
2447324477#ifndef NDEBUG
24474- SmallPtrSet<Value *, 4> IgnoreSet;
24475- for (ArrayRef<Value *> RdxOps : ReductionOps)
24476- IgnoreSet.insert_range(RdxOps);
24478+ SmallPtrSet<Value *, 4> IgnoreSet;
24479+ for (ArrayRef<Value *> RdxOps : ReductionOps)
24480+ IgnoreSet.insert_range(RdxOps);
2447724481#endif
24478- for (ArrayRef<Value *> RdxOps : ReductionOps) {
24479- for (Value *Ignore : RdxOps) {
24480- if (!Ignore)
24481- continue;
24482+ for (ArrayRef<Value *> RdxOps : ReductionOps) {
24483+ for (Value *Ignore : RdxOps) {
24484+ if (!Ignore)
24485+ continue;
2448224486#ifndef NDEBUG
24483- for (auto *U : Ignore->users()) {
24484- assert(IgnoreSet.count(U) &&
24485- "All users must be either in the reduction ops list.");
24486- }
24487+ for (auto *U : Ignore->users()) {
24488+ assert(IgnoreSet.count(U) &&
24489+ "All users must be either in the reduction ops list.");
24490+ }
2448724491#endif
24488- if (!Ignore->use_empty()) {
24489- Value *P = PoisonValue::get(Ignore->getType());
24490- Ignore->replaceAllUsesWith(P);
24491- }
24492+ if (!Ignore->use_empty()) {
24493+ Value *P = PoisonValue::get(Ignore->getType());
24494+ Ignore->replaceAllUsesWith(P);
2449224495 }
24493- V.removeInstructionsAndOperands(RdxOps, VectorValuesAndScales);
2449424496 }
24495- } else if (!CheckForReusedReductionOps) {
24496- for (ReductionOpsType &RdxOps : ReductionOps)
24497- for (Value *RdxOp : RdxOps)
24498- V.analyzedReductionRoot(cast<Instruction>(RdxOp));
24497+ V.removeInstructionsAndOperands(RdxOps, VectorValuesAndScales);
2449924498 }
2450024499 return VectorizedTree;
2450124500 }
0 commit comments