diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 2a2a46a19d7c1..88ca37ea12cf5 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1458,16 +1458,73 @@ InstructionCost ARMTTIImpl::getArithmeticInstrCost( if (LooksLikeAFreeShift()) return 0; + // When targets have both DSP and MVE we find that the + // the compiler will attempt to vectorize as well as using + // scalar (S/U)MLAL operations. This is in cases where we have + // the pattern ext(mul(ext(i16), ext(i16))) we find + // that codegen performs better when only using (S/U)MLAL scalar + // ops instead of trying to mix vector ops with (S/U)MLAL ops. We therefore + // check if a mul instruction is used in a (U/S)MLAL pattern. + auto MulInDSPMLALPattern = [&](const Instruction *I, unsigned Opcode, + Type *Ty) -> bool { + if (!ST->hasDSP()) + return false; + + if (!I) + return false; + + if (Opcode != Instruction::Mul) + return false; + + if (Ty->isVectorTy()) + return false; + + auto ValueOpcodesEqual = [](const Value *LHS, const Value *RHS) -> bool { + return cast(LHS)->getOpcode() == + cast(RHS)->getOpcode(); + }; + auto IsExtInst = [](const Value *V) -> bool { + return isa(V) || isa(V); + }; + auto IsExtensionFromHalf = [&, IsExtInst](const Value *V) -> bool { + return cast(V)->getOperand(0)->getType()->isIntegerTy(16); + }; + + // We check the arguments of the instruction to see if they're extends + auto *BinOp = dyn_cast(I); + if (!BinOp) + return false; + Value *Op0 = BinOp->getOperand(0); + Value *Op1 = BinOp->getOperand(1); + if (IsExtInst(Op0) && IsExtInst(Op1) && ValueOpcodesEqual(Op0, Op1)) { + // We're interested in an ext of an i16 + if (!I->getType()->isIntegerTy(32) || !IsExtensionFromHalf(Op0) || + !IsExtensionFromHalf(Op1)) + return false; + // We need to check if this result will be further extended to i64 + // and that all these uses are SExt + for (auto *U : I->users()) + if (!IsExtInst(U)) + return false; + return true; + } + + return false; + }; + + if (MulInDSPMLALPattern(CxtI, Opcode, Ty)) + return 0; + // Default to cheap (throughput/size of 1 instruction) but adjust throughput // for "multiple beats" potentially needed by MVE instructions. int BaseCost = 1; if (ST->hasMVEIntegerOps() && Ty->isVectorTy()) BaseCost = ST->getMVEVectorCostFactor(CostKind); - // The rest of this mostly follows what is done in BaseT::getArithmeticInstrCost, - // without treating floats as more expensive that scalars or increasing the - // costs for custom operations. The results is also multiplied by the - // MVEVectorCostFactor where appropriate. + // The rest of this mostly follows what is done in + // BaseT::getArithmeticInstrCost, without treating floats as more expensive + // that scalars or increasing the costs for custom operations. The results is + // also multiplied by the MVEVectorCostFactor where appropriate. if (TLI->isOperationLegalOrCustomOrPromote(ISDOpcode, LT.second)) return LT.first * BaseCost; diff --git a/llvm/test/Analysis/CostModel/ARM/muls-in-smlal-patterns.ll b/llvm/test/Analysis/CostModel/ARM/muls-in-smlal-patterns.ll new file mode 100644 index 0000000000000..7de2799d5af9c --- /dev/null +++ b/llvm/test/Analysis/CostModel/ARM/muls-in-smlal-patterns.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple thumbv8.1-m.main -mattr=+dsp < %s | FileCheck %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple thumbv8.1-m.main < %s | FileCheck %s --check-prefix=CHECK-NO-DSP + +define i64 @test(i16 %a, i16 %b) { +; CHECK-LABEL: 'test' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms +; +; CHECK-NO-DSP-LABEL: 'test' +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms +; + %as = sext i16 %a to i32 + %bs = sext i16 %b to i32 + %m = mul i32 %as, %bs + %ms = sext i32 %m to i64 + ret i64 %ms +} + +define i64 @withadd(i16 %a, i16 %b, i64 %c) { +; CHECK-LABEL: 'withadd' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r +; +; CHECK-NO-DSP-LABEL: 'withadd' +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r +; + %as = sext i16 %a to i32 + %bs = sext i16 %b to i32 + %m = mul i32 %as, %bs + %ms = sext i32 %m to i64 + %r = add i64 %c, %ms + ret i64 %r +} + +define i64 @withloads(ptr %pa, ptr %pb, i64 %c) { +; CHECK-LABEL: 'withloads' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %pa, align 2 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = load i16, ptr %pb, align 2 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = sext i16 %a to i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bs = sext i16 %b to i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r +; +; CHECK-NO-DSP-LABEL: 'withloads' +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %pa, align 2 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = load i16, ptr %pb, align 2 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = sext i16 %a to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bs = sext i16 %b to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r +; + %a = load i16, ptr %pa + %b = load i16, ptr %pb + %as = sext i16 %a to i32 + %bs = sext i16 %b to i32 + %m = mul i32 %as, %bs + %ms = sext i32 %m to i64 + %r = add i64 %c, %ms + ret i64 %r +} + +define i64 @different_extend_ops(i16 %a, i16 %b) { +; CHECK-LABEL: 'different_extend_ops' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = zext i16 %b to i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms +; +; CHECK-NO-DSP-LABEL: 'different_extend_ops' +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = zext i16 %b to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms +; + %as = sext i16 %a to i32 + %bs = zext i16 %b to i32 + %m = mul i32 %as, %bs + %ms = sext i32 %m to i64 + ret i64 %ms +} diff --git a/llvm/test/Analysis/CostModel/ARM/muls-in-umull-patterns.ll b/llvm/test/Analysis/CostModel/ARM/muls-in-umull-patterns.ll new file mode 100644 index 0000000000000..521816d13000b --- /dev/null +++ b/llvm/test/Analysis/CostModel/ARM/muls-in-umull-patterns.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple thumbv8.1-m.main -mattr=+dsp < %s | FileCheck %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple thumbv8.1-m.main < %s | FileCheck %s --check-prefix=CHECK-NO-DSP +define i64 @test(i16 %a, i16 %b) { +; CHECK-LABEL: 'test' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = zext i16 %a to i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = zext i16 %b to i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = zext i32 %m to i64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms +; +; CHECK-NO-DSP-LABEL: 'test' +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = zext i16 %a to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = zext i16 %b to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = zext i32 %m to i64 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms +; + %as = zext i16 %a to i32 + %bs = zext i16 %b to i32 + %m = mul i32 %as, %bs + %ms = zext i32 %m to i64 + ret i64 %ms +} + +define i64 @withadd(i16 %a, i16 %b, i64 %c) { +; CHECK-LABEL: 'withadd' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = zext i16 %a to i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = zext i16 %b to i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = zext i32 %m to i64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r +; +; CHECK-NO-DSP-LABEL: 'withadd' +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = zext i16 %a to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = zext i16 %b to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = zext i32 %m to i64 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r +; + %as = zext i16 %a to i32 + %bs = zext i16 %b to i32 + %m = mul i32 %as, %bs + %ms = zext i32 %m to i64 + %r = add i64 %c, %ms + ret i64 %r +} + +define i64 @withloads(ptr %pa, ptr %pb, i64 %c) { +; CHECK-LABEL: 'withloads' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %pa, align 2 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = load i16, ptr %pb, align 2 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = zext i16 %a to i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bs = zext i16 %b to i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = zext i32 %m to i64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r +; +; CHECK-NO-DSP-LABEL: 'withloads' +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %pa, align 2 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = load i16, ptr %pb, align 2 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = zext i16 %a to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bs = zext i16 %b to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = zext i32 %m to i64 +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms +; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r +; + %a = load i16, ptr %pa + %b = load i16, ptr %pb + %as = zext i16 %a to i32 + %bs = zext i16 %b to i32 + %m = mul i32 %as, %bs + %ms = zext i32 %m to i64 + %r = add i64 %c, %ms + ret i64 %r +}