Skip to content

Commit a2db388

Browse files
committed
[CostModel][X86] Improve ISD::CTTZ costs accounting for BSF/TZCNT implementations
1 parent ec3218d commit a2db388

File tree

3 files changed

+244
-172
lines changed

3 files changed

+244
-172
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2133,6 +2133,14 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
21332133
{ ISD::FSQRT, MVT::f32, 28 }, // Pentium III from http://www.agner.org/
21342134
{ ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/
21352135
};
2136+
static const CostTblEntry BMI64CostTbl[] = { // 64-bit targets
2137+
{ ISD::CTTZ, MVT::i64, 1 },
2138+
};
2139+
static const CostTblEntry BMI32CostTbl[] = { // 32 or 64-bit targets
2140+
{ ISD::CTTZ, MVT::i32, 1 },
2141+
{ ISD::CTTZ, MVT::i16, 1 },
2142+
{ ISD::CTTZ, MVT::i8, 1 },
2143+
};
21362144
static const CostTblEntry LZCNT64CostTbl[] = { // 64-bit targets
21372145
{ ISD::CTLZ, MVT::i64, 1 },
21382146
};
@@ -2152,6 +2160,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
21522160
static const CostTblEntry X64CostTbl[] = { // 64-bit targets
21532161
{ ISD::BITREVERSE, MVT::i64, 14 },
21542162
{ ISD::CTLZ, MVT::i64, 4 }, // BSR+XOR or BSR+XOR+CMOV
2163+
{ ISD::CTTZ, MVT::i64, 3 }, // TEST+BSF+CMOV/BRANCH
21552164
{ ISD::CTPOP, MVT::i64, 10 },
21562165
{ ISD::SADDO, MVT::i64, 1 },
21572166
{ ISD::UADDO, MVT::i64, 1 },
@@ -2163,6 +2172,9 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
21632172
{ ISD::CTLZ, MVT::i32, 4 }, // BSR+XOR or BSR+XOR+CMOV
21642173
{ ISD::CTLZ, MVT::i16, 4 }, // BSR+XOR or BSR+XOR+CMOV
21652174
{ ISD::CTLZ, MVT::i8, 4 }, // BSR+XOR or BSR+XOR+CMOV
2175+
{ ISD::CTTZ, MVT::i32, 3 }, // TEST+BSF+CMOV/BRANCH
2176+
{ ISD::CTTZ, MVT::i16, 3 }, // TEST+BSF+CMOV/BRANCH
2177+
{ ISD::CTTZ, MVT::i8, 3 }, // TEST+BSF+CMOV/BRANCH
21662178
{ ISD::CTPOP, MVT::i32, 8 },
21672179
{ ISD::CTPOP, MVT::i16, 9 },
21682180
{ ISD::CTPOP, MVT::i8, 7 },
@@ -2282,6 +2294,15 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
22822294
if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
22832295
return LT.first * Entry->Cost;
22842296

2297+
if (ST->hasBMI()) {
2298+
if (ST->is64Bit())
2299+
if (const auto *Entry = CostTableLookup(BMI64CostTbl, ISD, MTy))
2300+
return LT.first * Entry->Cost;
2301+
2302+
if (const auto *Entry = CostTableLookup(BMI32CostTbl, ISD, MTy))
2303+
return LT.first * Entry->Cost;
2304+
}
2305+
22852306
if (ST->hasLZCNT()) {
22862307
if (ST->is64Bit())
22872308
if (const auto *Entry = CostTableLookup(LZCNT64CostTbl, ISD, MTy))

llvm/test/Analysis/CostModel/X86/cttz.ll

Lines changed: 63 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2-
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=CHECK,SSE,SSE2
3-
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=CHECK,SSE,SSE42
4-
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=CHECK,AVX,AVX1
5-
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=CHECK,AVX,AVX2
6-
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s -check-prefixes=CHECK,AVX512,AVX512F
7-
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=CHECK,AVX512,AVX512BW
2+
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=-bmi,+sse2 | FileCheck %s -check-prefixes=CHECK,SSE,SSE2,NOBMI
3+
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+sse2 | FileCheck %s -check-prefixes=CHECK,SSE,SSE2,BMI
4+
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+sse4.2 | FileCheck %s -check-prefixes=CHECK,BMI,SSE,SSE42
5+
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+avx | FileCheck %s -check-prefixes=CHECK,BMI,AVX,AVX1
6+
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+avx2 | FileCheck %s -check-prefixes=CHECK,BMI,AVX,AVX2
7+
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+avx512f | FileCheck %s -check-prefixes=CHECK,BMI,AVX512,AVX512F
8+
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=CHECK,BMI,AVX512,AVX512BW
89

910
; Verify the cost of scalar trailing zero count instructions.
1011

@@ -14,72 +15,104 @@ declare i16 @llvm.cttz.i16(i16, i1)
1415
declare i8 @llvm.cttz.i8(i8, i1)
1516

1617
define i64 @var_cttz_i64(i64 %a) {
17-
; CHECK-LABEL: 'var_cttz_i64'
18-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
19-
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %cttz
18+
; NOBMI-LABEL: 'var_cttz_i64'
19+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
20+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %cttz
21+
;
22+
; BMI-LABEL: 'var_cttz_i64'
23+
; BMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
24+
; BMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %cttz
2025
;
2126
%cttz = call i64 @llvm.cttz.i64(i64 %a, i1 0)
2227
ret i64 %cttz
2328
}
2429

2530
define i64 @var_cttz_i64u(i64 %a) {
26-
; CHECK-LABEL: 'var_cttz_i64u'
27-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 true)
28-
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %cttz
31+
; NOBMI-LABEL: 'var_cttz_i64u'
32+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 true)
33+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %cttz
34+
;
35+
; BMI-LABEL: 'var_cttz_i64u'
36+
; BMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 true)
37+
; BMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %cttz
2938
;
3039
%cttz = call i64 @llvm.cttz.i64(i64 %a, i1 1)
3140
ret i64 %cttz
3241
}
3342

3443
define i32 @var_cttz_i32(i32 %a) {
35-
; CHECK-LABEL: 'var_cttz_i32'
36-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false)
37-
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %cttz
44+
; NOBMI-LABEL: 'var_cttz_i32'
45+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false)
46+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %cttz
47+
;
48+
; BMI-LABEL: 'var_cttz_i32'
49+
; BMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false)
50+
; BMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %cttz
3851
;
3952
%cttz = call i32 @llvm.cttz.i32(i32 %a, i1 0)
4053
ret i32 %cttz
4154
}
4255

4356
define i32 @var_cttz_i32u(i32 %a) {
44-
; CHECK-LABEL: 'var_cttz_i32u'
45-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 true)
46-
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %cttz
57+
; NOBMI-LABEL: 'var_cttz_i32u'
58+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 true)
59+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %cttz
60+
;
61+
; BMI-LABEL: 'var_cttz_i32u'
62+
; BMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 true)
63+
; BMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %cttz
4764
;
4865
%cttz = call i32 @llvm.cttz.i32(i32 %a, i1 1)
4966
ret i32 %cttz
5067
}
5168

5269
define i16 @var_cttz_i16(i16 %a) {
53-
; CHECK-LABEL: 'var_cttz_i16'
54-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 false)
55-
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %cttz
70+
; NOBMI-LABEL: 'var_cttz_i16'
71+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 false)
72+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %cttz
73+
;
74+
; BMI-LABEL: 'var_cttz_i16'
75+
; BMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 false)
76+
; BMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %cttz
5677
;
5778
%cttz = call i16 @llvm.cttz.i16(i16 %a, i1 0)
5879
ret i16 %cttz
5980
}
6081

6182
define i16 @var_cttz_i16u(i16 %a) {
62-
; CHECK-LABEL: 'var_cttz_i16u'
63-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true)
64-
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %cttz
83+
; NOBMI-LABEL: 'var_cttz_i16u'
84+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true)
85+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %cttz
86+
;
87+
; BMI-LABEL: 'var_cttz_i16u'
88+
; BMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true)
89+
; BMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %cttz
6590
;
6691
%cttz = call i16 @llvm.cttz.i16(i16 %a, i1 1)
6792
ret i16 %cttz
6893
}
6994

7095
define i8 @var_cttz_i8(i8 %a) {
71-
; CHECK-LABEL: 'var_cttz_i8'
72-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 false)
73-
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %cttz
96+
; NOBMI-LABEL: 'var_cttz_i8'
97+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 false)
98+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %cttz
99+
;
100+
; BMI-LABEL: 'var_cttz_i8'
101+
; BMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 false)
102+
; BMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %cttz
74103
;
75104
%cttz = call i8 @llvm.cttz.i8(i8 %a, i1 0)
76105
ret i8 %cttz
77106
}
78107

79108
define i8 @var_cttz_i8u(i8 %a) {
80-
; CHECK-LABEL: 'var_cttz_i8u'
81-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 true)
82-
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %cttz
109+
; NOBMI-LABEL: 'var_cttz_i8u'
110+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 true)
111+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %cttz
112+
;
113+
; BMI-LABEL: 'var_cttz_i8u'
114+
; BMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 true)
115+
; BMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %cttz
83116
;
84117
%cttz = call i8 @llvm.cttz.i8(i8 %a, i1 1)
85118
ret i8 %cttz

0 commit comments

Comments
 (0)