|
40 | 40 | #include <cassert> |
41 | 41 |
|
42 | 42 | using namespace llvm; |
43 | | -using namespace llvm::VPlanPatternMatch; |
44 | 43 |
|
45 | 44 | using VectorParts = SmallVector<Value *, 2>; |
46 | 45 |
|
@@ -304,6 +303,7 @@ VPPartialReductionRecipe::computeCost(ElementCount VF, |
304 | 303 | VPRecipeBase *OpR = Op->getDefiningRecipe(); |
305 | 304 |
|
306 | 305 | // If the partial reduction is predicated, a select will be operand 0 |
| 306 | + using namespace llvm::VPlanPatternMatch; |
307 | 307 | if (match(getOperand(1), m_Select(m_VPValue(), m_VPValue(Op), m_VPValue()))) { |
308 | 308 | OpR = Op->getDefiningRecipe(); |
309 | 309 | } |
@@ -1963,6 +1963,7 @@ InstructionCost VPWidenSelectRecipe::computeCost(ElementCount VF, |
1963 | 1963 | Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); |
1964 | 1964 |
|
1965 | 1965 | VPValue *Op0, *Op1; |
| 1966 | + using namespace llvm::VPlanPatternMatch; |
1966 | 1967 | if (!ScalarCond && ScalarTy->getScalarSizeInBits() == 1 && |
1967 | 1968 | (match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1))) || |
1968 | 1969 | match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))))) { |
@@ -3110,62 +3111,6 @@ bool VPReplicateRecipe::shouldPack() const { |
3110 | 3111 | }); |
3111 | 3112 | } |
3112 | 3113 |
|
3113 | | -/// Returns true if \p Ptr is a pointer computation for which the legacy cost |
3114 | | -/// model computes a SCEV expression when computing the address cost. |
3115 | | -static bool shouldUseAddressAccessSCEV(const VPValue *Ptr) { |
3116 | | - auto *PtrR = Ptr->getDefiningRecipe(); |
3117 | | - if (!PtrR || !((isa<VPReplicateRecipe>(PtrR) && |
3118 | | - cast<VPReplicateRecipe>(PtrR)->getOpcode() == |
3119 | | - Instruction::GetElementPtr) || |
3120 | | - isa<VPWidenGEPRecipe>(PtrR) || |
3121 | | - match(Ptr, m_GetElementPtr(m_VPValue(), m_VPValue())))) |
3122 | | - return false; |
3123 | | - |
3124 | | - // We are looking for a GEP where all indices are either loop invariant or |
3125 | | - // inductions. |
3126 | | - for (VPValue *Opd : drop_begin(PtrR->operands())) { |
3127 | | - if (!Opd->isDefinedOutsideLoopRegions() && |
3128 | | - !isa<VPScalarIVStepsRecipe, VPWidenIntOrFpInductionRecipe>(Opd)) |
3129 | | - return false; |
3130 | | - } |
3131 | | - |
3132 | | - return true; |
3133 | | -} |
3134 | | - |
3135 | | -/// Returns true if \p V is used as part of the address of another load or |
3136 | | -/// store. |
3137 | | -static bool isUsedByLoadStoreAddress(const VPUser *V) { |
3138 | | - SmallPtrSet<const VPUser *, 4> Seen; |
3139 | | - SmallVector<const VPUser *> WorkList = {V}; |
3140 | | - |
3141 | | - while (!WorkList.empty()) { |
3142 | | - auto *Cur = dyn_cast<VPSingleDefRecipe>(WorkList.pop_back_val()); |
3143 | | - if (!Cur || !Seen.insert(Cur).second) |
3144 | | - continue; |
3145 | | - |
3146 | | - for (VPUser *U : Cur->users()) { |
3147 | | - if (auto *InterleaveR = dyn_cast<VPInterleaveBase>(U)) |
3148 | | - if (InterleaveR->getAddr() == Cur) |
3149 | | - return true; |
3150 | | - if (auto *RepR = dyn_cast<VPReplicateRecipe>(U)) { |
3151 | | - if (RepR->getOpcode() == Instruction::Load && |
3152 | | - RepR->getOperand(0) == Cur) |
3153 | | - return true; |
3154 | | - if (RepR->getOpcode() == Instruction::Store && |
3155 | | - RepR->getOperand(1) == Cur) |
3156 | | - return true; |
3157 | | - } |
3158 | | - if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U)) { |
3159 | | - if (MemR->getAddr() == Cur && MemR->isConsecutive()) |
3160 | | - return true; |
3161 | | - } |
3162 | | - } |
3163 | | - |
3164 | | - append_range(WorkList, cast<VPSingleDefRecipe>(Cur)->users()); |
3165 | | - } |
3166 | | - return false; |
3167 | | -} |
3168 | | - |
3169 | 3114 | InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, |
3170 | 3115 | VPCostContext &Ctx) const { |
3171 | 3116 | Instruction *UI = cast<Instruction>(getUnderlyingValue()); |
@@ -3273,58 +3218,21 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, |
3273 | 3218 | } |
3274 | 3219 | case Instruction::Load: |
3275 | 3220 | case Instruction::Store: { |
3276 | | - if (VF.isScalable() && !isSingleScalar()) |
3277 | | - return InstructionCost::getInvalid(); |
3278 | | - |
| 3221 | + if (isSingleScalar()) { |
| 3222 | + bool IsLoad = UI->getOpcode() == Instruction::Load; |
| 3223 | + Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0)); |
| 3224 | + Type *ScalarPtrTy = Ctx.Types.inferScalarType(getOperand(IsLoad ? 0 : 1)); |
| 3225 | + const Align Alignment = getLoadStoreAlignment(UI); |
| 3226 | + unsigned AS = getLoadStoreAddressSpace(UI); |
| 3227 | + TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(UI->getOperand(0)); |
| 3228 | + InstructionCost ScalarMemOpCost = Ctx.TTI.getMemoryOpCost( |
| 3229 | + UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo, UI); |
| 3230 | + return ScalarMemOpCost + Ctx.TTI.getAddressComputationCost( |
| 3231 | + ScalarPtrTy, nullptr, nullptr, Ctx.CostKind); |
| 3232 | + } |
3279 | 3233 | // TODO: See getMemInstScalarizationCost for how to handle replicating and |
3280 | 3234 | // predicated cases. |
3281 | | - const VPRegionBlock *ParentRegion = getParent()->getParent(); |
3282 | | - if (ParentRegion && ParentRegion->isReplicator()) |
3283 | | - break; |
3284 | | - |
3285 | | - bool IsLoad = UI->getOpcode() == Instruction::Load; |
3286 | | - const VPValue *PtrOp = getOperand(!IsLoad); |
3287 | | - // TODO: Handle cases where we need to pass a SCEV to |
3288 | | - // getAddressComputationCost. |
3289 | | - if (shouldUseAddressAccessSCEV(PtrOp)) |
3290 | | - break; |
3291 | | - |
3292 | | - Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0)); |
3293 | | - Type *ScalarPtrTy = Ctx.Types.inferScalarType(PtrOp); |
3294 | | - const Align Alignment = getLoadStoreAlignment(UI); |
3295 | | - unsigned AS = getLoadStoreAddressSpace(UI); |
3296 | | - TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(UI->getOperand(0)); |
3297 | | - InstructionCost ScalarMemOpCost = Ctx.TTI.getMemoryOpCost( |
3298 | | - UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo); |
3299 | | - |
3300 | | - Type *PtrTy = isSingleScalar() ? ScalarPtrTy : toVectorTy(ScalarPtrTy, VF); |
3301 | | - |
3302 | | - InstructionCost ScalarCost = |
3303 | | - ScalarMemOpCost + Ctx.TTI.getAddressComputationCost( |
3304 | | - PtrTy, &Ctx.SE, nullptr, Ctx.CostKind); |
3305 | | - if (isSingleScalar()) |
3306 | | - return ScalarCost; |
3307 | | - |
3308 | | - SmallVector<const VPValue *> OpsToScalarize; |
3309 | | - Type *ResultTy = Type::getVoidTy(PtrTy->getContext()); |
3310 | | - // Set ResultTy and OpsToScalarize, if scalarization is needed. Currently we |
3311 | | - // don't assign scalarization overhead in general, if the target prefers |
3312 | | - // vectorized addressing or the loaded value is used as part of an address |
3313 | | - // of another load or store. |
3314 | | - bool PreferVectorizedAddressing = Ctx.TTI.prefersVectorizedAddressing(); |
3315 | | - if (PreferVectorizedAddressing || !isUsedByLoadStoreAddress(this)) { |
3316 | | - bool EfficientVectorLoadStore = |
3317 | | - Ctx.TTI.supportsEfficientVectorElementLoadStore(); |
3318 | | - if (!(IsLoad && !PreferVectorizedAddressing) && |
3319 | | - !(!IsLoad && EfficientVectorLoadStore)) |
3320 | | - append_range(OpsToScalarize, operands()); |
3321 | | - |
3322 | | - if (!EfficientVectorLoadStore) |
3323 | | - ResultTy = Ctx.Types.inferScalarType(this); |
3324 | | - } |
3325 | | - |
3326 | | - return (ScalarCost * VF.getFixedValue()) + |
3327 | | - Ctx.getScalarizationOverhead(ResultTy, OpsToScalarize, VF, true); |
| 3235 | + break; |
3328 | 3236 | } |
3329 | 3237 | } |
3330 | 3238 |
|
|
0 commit comments