@@ -24435,135 +24435,134 @@ class HorizontalReduction {
2443524435 VectorizedTree = GetNewVectorizedTree(
2443624436 VectorizedTree,
2443724437 emitReduction(Builder, *TTI, ReductionRoot->getType()));
24438- if (VectorizedTree) {
24439- // Reorder operands of bool logical op in the natural order to avoid
24440- // possible problem with poison propagation. If not possible to reorder
24441- // (both operands are originally RHS), emit an extra freeze instruction
24442- // for the LHS operand.
24443- // I.e., if we have original code like this:
24444- // RedOp1 = select i1 ?, i1 LHS, i1 false
24445- // RedOp2 = select i1 RHS, i1 ?, i1 false
24446-
24447- // Then, we swap LHS/RHS to create a new op that matches the poison
24448- // semantics of the original code.
24449-
24450- // If we have original code like this and both values could be poison:
24451- // RedOp1 = select i1 ?, i1 LHS, i1 false
24452- // RedOp2 = select i1 ?, i1 RHS, i1 false
24453-
24454- // Then, we must freeze LHS in the new op.
24455- auto FixBoolLogicalOps = [&, VectorizedTree](Value *&LHS, Value *&RHS,
24456- Instruction *RedOp1,
24457- Instruction *RedOp2,
24458- bool InitStep) {
24459- if (!AnyBoolLogicOp)
24460- return;
24461- if (isBoolLogicOp(RedOp1) && ((!InitStep && LHS == VectorizedTree) ||
24462- getRdxOperand(RedOp1, 0) == LHS ||
24463- isGuaranteedNotToBePoison(LHS, AC)))
24464- return;
24465- if (isBoolLogicOp(RedOp2) && ((!InitStep && RHS == VectorizedTree) ||
24466- getRdxOperand(RedOp2, 0) == RHS ||
24467- isGuaranteedNotToBePoison(RHS, AC))) {
24468- std::swap(LHS, RHS);
24469- return;
24470- }
24471- if (LHS != VectorizedTree)
24472- LHS = Builder.CreateFreeze(LHS);
24473- };
24474- // Finish the reduction.
24475- // Need to add extra arguments and not vectorized possible reduction
24476- // values.
24477- // Try to avoid dependencies between the scalar remainders after
24478- // reductions.
24479- auto FinalGen =
24480- [&](ArrayRef<std::pair<Instruction *, Value *>> InstVals,
24481- bool InitStep) {
24482- unsigned Sz = InstVals.size();
24483- SmallVector<std::pair<Instruction *, Value *>> ExtraReds(Sz / 2 +
24484- Sz % 2);
24485- for (unsigned I = 0, E = (Sz / 2) * 2; I < E; I += 2) {
24486- Instruction *RedOp = InstVals[I + 1].first;
24487- Builder.SetCurrentDebugLocation(RedOp->getDebugLoc());
24488- Value *RdxVal1 = InstVals[I].second;
24489- Value *StableRdxVal1 = RdxVal1;
24490- auto It1 = TrackedVals.find(RdxVal1);
24491- if (It1 != TrackedVals.end())
24492- StableRdxVal1 = It1->second;
24493- Value *RdxVal2 = InstVals[I + 1].second;
24494- Value *StableRdxVal2 = RdxVal2;
24495- auto It2 = TrackedVals.find(RdxVal2);
24496- if (It2 != TrackedVals.end())
24497- StableRdxVal2 = It2->second;
24498- // To prevent poison from leaking across what used to be
24499- // sequential, safe, scalar boolean logic operations, the
24500- // reduction operand must be frozen.
24501- FixBoolLogicalOps(StableRdxVal1, StableRdxVal2, InstVals[I].first,
24502- RedOp, InitStep);
24503- Value *ExtraRed = createOp(Builder, RdxKind, StableRdxVal1,
24504- StableRdxVal2, "op.rdx", ReductionOps);
24505- ExtraReds[I / 2] = std::make_pair(InstVals[I].first, ExtraRed);
24506- }
24507- if (Sz % 2 == 1)
24508- ExtraReds[Sz / 2] = InstVals.back();
24509- return ExtraReds;
24510- };
24511- SmallVector<std::pair<Instruction *, Value *>> ExtraReductions;
24512- ExtraReductions.emplace_back(cast<Instruction>(ReductionRoot),
24513- VectorizedTree);
24514- SmallPtrSet<Value *, 8> Visited;
24515- for (ArrayRef<Value *> Candidates : ReducedVals) {
24516- for (Value *RdxVal : Candidates) {
24517- if (!Visited.insert(RdxVal).second)
24518- continue;
24519- unsigned NumOps = VectorizedVals.lookup(RdxVal);
24520- for (Instruction *RedOp :
24521- ArrayRef(ReducedValsToOps.at(RdxVal)).drop_back(NumOps))
24522- ExtraReductions.emplace_back(RedOp, RdxVal);
24523- }
24438+
24439+ if (!VectorizedTree) {
24440+ if (!CheckForReusedReductionOps) {
24441+ for (ReductionOpsType &RdxOps : ReductionOps)
24442+ for (Value *RdxOp : RdxOps)
24443+ V.analyzedReductionRoot(cast<Instruction>(RdxOp));
2452424444 }
24525- // Iterate through all not-vectorized reduction values/extra arguments.
24526- bool InitStep = true;
24527- while (ExtraReductions.size() > 1) {
24528- SmallVector<std::pair<Instruction *, Value *>> NewReds =
24529- FinalGen(ExtraReductions, InitStep);
24530- ExtraReductions.swap(NewReds);
24531- InitStep = false;
24445+ return nullptr;
24446+ }
24447+
24448+ // Reorder operands of bool logical op in the natural order to avoid
24449+ // possible problem with poison propagation. If not possible to reorder
24450+ // (both operands are originally RHS), emit an extra freeze instruction
24451+ // for the LHS operand.
24452+ // I.e., if we have original code like this:
24453+ // RedOp1 = select i1 ?, i1 LHS, i1 false
24454+ // RedOp2 = select i1 RHS, i1 ?, i1 false
24455+
24456+ // Then, we swap LHS/RHS to create a new op that matches the poison
24457+ // semantics of the original code.
24458+
24459+ // If we have original code like this and both values could be poison:
24460+ // RedOp1 = select i1 ?, i1 LHS, i1 false
24461+ // RedOp2 = select i1 ?, i1 RHS, i1 false
24462+
24463+ // Then, we must freeze LHS in the new op.
24464+ auto FixBoolLogicalOps =
24465+ [&, VectorizedTree](Value *&LHS, Value *&RHS, Instruction *RedOp1,
24466+ Instruction *RedOp2, bool InitStep) {
24467+ if (!AnyBoolLogicOp)
24468+ return;
24469+ if (isBoolLogicOp(RedOp1) && ((!InitStep && LHS == VectorizedTree) ||
24470+ getRdxOperand(RedOp1, 0) == LHS ||
24471+ isGuaranteedNotToBePoison(LHS, AC)))
24472+ return;
24473+ if (isBoolLogicOp(RedOp2) && ((!InitStep && RHS == VectorizedTree) ||
24474+ getRdxOperand(RedOp2, 0) == RHS ||
24475+ isGuaranteedNotToBePoison(RHS, AC))) {
24476+ std::swap(LHS, RHS);
24477+ return;
24478+ }
24479+ if (LHS != VectorizedTree)
24480+ LHS = Builder.CreateFreeze(LHS);
24481+ };
24482+ // Finish the reduction.
24483+ // Need to add extra arguments and not vectorized possible reduction values.
24484+ // Try to avoid dependencies between the scalar remainders after reductions.
24485+ auto FinalGen = [&](ArrayRef<std::pair<Instruction *, Value *>> InstVals,
24486+ bool InitStep) {
24487+ unsigned Sz = InstVals.size();
24488+ SmallVector<std::pair<Instruction *, Value *>> ExtraReds(Sz / 2 + Sz % 2);
24489+ for (unsigned I = 0, E = (Sz / 2) * 2; I < E; I += 2) {
24490+ Instruction *RedOp = InstVals[I + 1].first;
24491+ Builder.SetCurrentDebugLocation(RedOp->getDebugLoc());
24492+ Value *RdxVal1 = InstVals[I].second;
24493+ Value *StableRdxVal1 = RdxVal1;
24494+ auto It1 = TrackedVals.find(RdxVal1);
24495+ if (It1 != TrackedVals.end())
24496+ StableRdxVal1 = It1->second;
24497+ Value *RdxVal2 = InstVals[I + 1].second;
24498+ Value *StableRdxVal2 = RdxVal2;
24499+ auto It2 = TrackedVals.find(RdxVal2);
24500+ if (It2 != TrackedVals.end())
24501+ StableRdxVal2 = It2->second;
24502+ // To prevent poison from leaking across what used to be sequential,
24503+ // safe, scalar boolean logic operations, the reduction operand must be
24504+ // frozen.
24505+ FixBoolLogicalOps(StableRdxVal1, StableRdxVal2, InstVals[I].first,
24506+ RedOp, InitStep);
24507+ Value *ExtraRed = createOp(Builder, RdxKind, StableRdxVal1,
24508+ StableRdxVal2, "op.rdx", ReductionOps);
24509+ ExtraReds[I / 2] = std::make_pair(InstVals[I].first, ExtraRed);
24510+ }
24511+ if (Sz % 2 == 1)
24512+ ExtraReds[Sz / 2] = InstVals.back();
24513+ return ExtraReds;
24514+ };
24515+ SmallVector<std::pair<Instruction *, Value *>> ExtraReductions;
24516+ ExtraReductions.emplace_back(cast<Instruction>(ReductionRoot),
24517+ VectorizedTree);
24518+ SmallPtrSet<Value *, 8> Visited;
24519+ for (ArrayRef<Value *> Candidates : ReducedVals) {
24520+ for (Value *RdxVal : Candidates) {
24521+ if (!Visited.insert(RdxVal).second)
24522+ continue;
24523+ unsigned NumOps = VectorizedVals.lookup(RdxVal);
24524+ for (Instruction *RedOp :
24525+ ArrayRef(ReducedValsToOps.at(RdxVal)).drop_back(NumOps))
24526+ ExtraReductions.emplace_back(RedOp, RdxVal);
2453224527 }
24533- VectorizedTree = ExtraReductions.front().second;
24528+ }
24529+ // Iterate through all not-vectorized reduction values/extra arguments.
24530+ bool InitStep = true;
24531+ while (ExtraReductions.size() > 1) {
24532+ SmallVector<std::pair<Instruction *, Value *>> NewReds =
24533+ FinalGen(ExtraReductions, InitStep);
24534+ ExtraReductions.swap(NewReds);
24535+ InitStep = false;
24536+ }
24537+ VectorizedTree = ExtraReductions.front().second;
2453424538
24535- ReductionRoot->replaceAllUsesWith(VectorizedTree);
24539+ ReductionRoot->replaceAllUsesWith(VectorizedTree);
2453624540
24537- // The original scalar reduction is expected to have no remaining
24538- // uses outside the reduction tree itself. Assert that we got this
24539- // correct, replace internal uses with undef, and mark for eventual
24540- // deletion.
24541+ // The original scalar reduction is expected to have no remaining
24542+ // uses outside the reduction tree itself. Assert that we got this
24543+ // correct, replace internal uses with undef, and mark for eventual
24544+ // deletion.
2454124545#ifndef NDEBUG
24542- SmallPtrSet<Value *, 4> IgnoreSet;
24543- for (ArrayRef<Value *> RdxOps : ReductionOps)
24544- IgnoreSet.insert_range(RdxOps);
24546+ SmallPtrSet<Value *, 4> IgnoreSet;
24547+ for (ArrayRef<Value *> RdxOps : ReductionOps)
24548+ IgnoreSet.insert_range(RdxOps);
2454524549#endif
24546- for (ArrayRef<Value *> RdxOps : ReductionOps) {
24547- for (Value *Ignore : RdxOps) {
24548- if (!Ignore)
24549- continue;
24550+ for (ArrayRef<Value *> RdxOps : ReductionOps) {
24551+ for (Value *Ignore : RdxOps) {
24552+ if (!Ignore)
24553+ continue;
2455024554#ifndef NDEBUG
24551- for (auto *U : Ignore->users()) {
24552- assert(IgnoreSet.count(U) &&
24553- "All users must be either in the reduction ops list.");
24554- }
24555+ for (auto *U : Ignore->users()) {
24556+ assert(IgnoreSet.count(U) &&
24557+ "All users must be either in the reduction ops list.");
24558+ }
2455524559#endif
24556- if (!Ignore->use_empty()) {
24557- Value *P = PoisonValue::get(Ignore->getType());
24558- Ignore->replaceAllUsesWith(P);
24559- }
24560+ if (!Ignore->use_empty()) {
24561+ Value *P = PoisonValue::get(Ignore->getType());
24562+ Ignore->replaceAllUsesWith(P);
2456024563 }
24561- V.removeInstructionsAndOperands(RdxOps, VectorValuesAndScales);
2456224564 }
24563- } else if (!CheckForReusedReductionOps) {
24564- for (ReductionOpsType &RdxOps : ReductionOps)
24565- for (Value *RdxOp : RdxOps)
24566- V.analyzedReductionRoot(cast<Instruction>(RdxOp));
24565+ V.removeInstructionsAndOperands(RdxOps, VectorValuesAndScales);
2456724566 }
2456824567 return VectorizedTree;
2456924568 }
0 commit comments