Skip to content

Commit fa240f2

Browse files
[RISCV] Update SiFive7's scheduling model on mask and data movement instructions (#160155)
Vector to scalar movement instructions, as well as mask instructions like vcpop and vfirst, should have a higher latency & occupancy on SiFive7. --------- Co-authored-by: Michael Maitland <[email protected]>
1 parent c4f86c4 commit fa240f2

File tree

3 files changed

+887
-4
lines changed

3 files changed

+887
-4
lines changed

llvm/lib/Target/RISCV/RISCVSchedSiFive7.td

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -929,16 +929,16 @@ multiclass SiFive7WriteResBase<int VLEN,
929929
}
930930

931931
// 15. Vector Mask Instructions
932+
// Simple mask logical
932933
foreach mx = SchedMxList in {
933934
defvar Cycles = SiFive7GetCyclesVMask<mx>.c;
934935
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
935936
let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
936937
defm : LMULWriteResMX<"WriteVMALUV", [VCQ, VA1], mx, IsWorstCase>;
937-
defm : LMULWriteResMX<"WriteVMPopV", [VCQ, VA1], mx, IsWorstCase>;
938-
defm : LMULWriteResMX<"WriteVMFFSV", [VCQ, VA1], mx, IsWorstCase>;
939938
defm : LMULWriteResMX<"WriteVMSFSV", [VCQ, VA1], mx, IsWorstCase>;
940939
}
941940
}
941+
// Simple mask logical used in series
942942
foreach mx = SchedMxList in {
943943
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
944944
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
@@ -947,13 +947,23 @@ multiclass SiFive7WriteResBase<int VLEN,
947947
defm : LMULWriteResMX<"WriteVIdxV", [VCQ, VA1], mx, IsWorstCase>;
948948
}
949949
}
950+
// Mask reduction
951+
foreach mx = SchedMxList in {
952+
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
953+
let Latency = 11, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 3)] in {
954+
defm "" : LMULWriteResMX<"WriteVMFFSV", [VCQ, VA1], mx, IsWorstCase>;
955+
defm "" : LMULWriteResMX<"WriteVMPopV", [VCQ, VA1], mx, IsWorstCase>;
956+
}
957+
}
950958

951959
// 16. Vector Permutation Instructions
960+
let Latency = 11, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 3)] in {
961+
def : WriteRes<WriteVMovXS, [VCQ, VA1]>;
962+
def : WriteRes<WriteVMovFS, [VCQ, VA1]>;
963+
}
952964
let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 1)] in {
953965
def : WriteRes<WriteVMovSX, [VCQ, VA1OrVA2]>;
954-
def : WriteRes<WriteVMovXS, [VCQ, VA1]>;
955966
def : WriteRes<WriteVMovSF, [VCQ, VA1OrVA2]>;
956-
def : WriteRes<WriteVMovFS, [VCQ, VA1]>;
957967
}
958968
foreach mx = SchedMxList in {
959969
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2+
# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-x280 -iterations=1 -instruction-tables=full < %s | FileCheck %s
3+
4+
vsetvli zero, zero, e32, m1, ta, ma
5+
6+
vmslt.vv v0, v4, v20
7+
vmsle.vv v8, v4, v20
8+
vmsgt.vv v8, v20, v4
9+
vmsge.vv v8, v20, v4
10+
vmseq.vv v8, v4, v20
11+
vmsne.vv v8, v4, v20
12+
vmsltu.vv v8, v4, v20
13+
vmsleu.vv v8, v4, v20
14+
vmsgtu.vv v8, v20, v4
15+
vmsgeu.vv v8, v20, v4
16+
17+
vmflt.vv v0, v4, v20
18+
vmfle.vv v8, v4, v20
19+
vmfgt.vv v8, v20, v4
20+
vmfge.vv v8, v20, v4
21+
vmfeq.vv v8, v4, v20
22+
vmfne.vv v8, v4, v20
23+
24+
vmadc.vv v8, v4, v20
25+
vmsbc.vv v8, v4, v20
26+
27+
vfirst.m a2, v4
28+
vpopc.m a2, v4
29+
30+
viota.m v8, v4
31+
32+
vmsbf.m v8, v4
33+
vmsif.m v8, v4
34+
vmsof.m v8, v4
35+
36+
# CHECK: Resources:
37+
# CHECK-NEXT: [0] - VLEN512SiFive7FDiv:1
38+
# CHECK-NEXT: [1] - VLEN512SiFive7IDiv:1
39+
# CHECK-NEXT: [2] - VLEN512SiFive7PipeA:1
40+
# CHECK-NEXT: [3] - VLEN512SiFive7PipeAB:2 VLEN512SiFive7PipeA, VLEN512SiFive7PipeB
41+
# CHECK-NEXT: [4] - VLEN512SiFive7PipeB:1
42+
# CHECK-NEXT: [5] - VLEN512SiFive7VA:1
43+
# CHECK-NEXT: [6] - VLEN512SiFive7VCQ:1
44+
# CHECK-NEXT: [7] - VLEN512SiFive7VL:1
45+
# CHECK-NEXT: [8] - VLEN512SiFive7VS:1
46+
47+
# CHECK: Instruction Info:
48+
# CHECK-NEXT: [1]: #uOps
49+
# CHECK-NEXT: [2]: Latency
50+
# CHECK-NEXT: [3]: RThroughput
51+
# CHECK-NEXT: [4]: MayLoad
52+
# CHECK-NEXT: [5]: MayStore
53+
# CHECK-NEXT: [6]: HasSideEffects (U)
54+
# CHECK-NEXT: [7]: Bypass Latency
55+
# CHECK-NEXT: [8]: Resources (<Name> | <Name>[<ReleaseAtCycle>] | <Name>[<AcquireAtCycle>,<ReleaseAtCycle])
56+
# CHECK-NEXT: [9]: LLVM Opcode Name
57+
58+
# CHECK: [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
59+
# CHECK-NEXT: 1 3 1.00 U 1 VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB VSETVLI vsetvli zero, zero, e32, m1, ta, ma
60+
# CHECK-NEXT: 1 5 2.00 5 VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ VMSLT_VV vmslt.vv v0, v4, v20
61+
# CHECK-NEXT: 1 5 2.00 5 VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ VMSLE_VV vmsle.vv v8, v4, v20
62+
# CHECK-NEXT: 1 5 2.00 5 VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ VMSLT_VV vmslt.vv v8, v4, v20
63+
# CHECK-NEXT: 1 5 2.00 5 VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ VMSLE_VV vmsle.vv v8, v4, v20
64+
# CHECK-NEXT: 1 5 2.00 5 VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ VMSEQ_VV vmseq.vv v8, v4, v20
65+
# CHECK-NEXT: 1 5 2.00 5 VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ VMSNE_VV vmsne.vv v8, v4, v20
66+
# CHECK-NEXT: 1 5 2.00 5 VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ VMSLTU_VV vmsltu.vv v8, v4, v20
67+
# CHECK-NEXT: 1 5 2.00 5 VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ VMSLEU_VV vmsleu.vv v8, v4, v20
68+
# CHECK-NEXT: 1 5 2.00 5 VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ VMSLTU_VV vmsltu.vv v8, v4, v20
69+
# CHECK-NEXT: 1 5 2.00 5 VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ VMSLEU_VV vmsleu.vv v8, v4, v20
70+
# CHECK-NEXT: 1 5 2.00 5 VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ VMFLT_VV vmflt.vv v0, v4, v20
71+
# CHECK-NEXT: 1 5 2.00 5 VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ VMFLE_VV vmfle.vv v8, v4, v20
72+
# CHECK-NEXT: 1 5 2.00 5 VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ VMFLT_VV vmflt.vv v8, v4, v20
73+
# CHECK-NEXT: 1 5 2.00 5 VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ VMFLE_VV vmfle.vv v8, v4, v20
74+
# CHECK-NEXT: 1 5 2.00 5 VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ VMFEQ_VV vmfeq.vv v8, v4, v20
75+
# CHECK-NEXT: 1 5 2.00 5 VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ VMFNE_VV vmfne.vv v8, v4, v20
76+
# CHECK-NEXT: 1 4 2.00 4 VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ VMADC_VV vmadc.vv v8, v4, v20
77+
# CHECK-NEXT: 1 4 2.00 4 VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ VMSBC_VV vmsbc.vv v8, v4, v20
78+
# CHECK-NEXT: 1 11 3.00 11 VLEN512SiFive7VA[1,4],VLEN512SiFive7VCQ VFIRST_M vfirst.m a2, v4
79+
# CHECK-NEXT: 1 11 3.00 11 VLEN512SiFive7VA[1,4],VLEN512SiFive7VCQ VCPOP_M vcpop.m a2, v4
80+
# CHECK-NEXT: 1 4 2.00 4 VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ VIOTA_M viota.m v8, v4
81+
# CHECK-NEXT: 1 4 1.00 4 VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ VMSBF_M vmsbf.m v8, v4
82+
# CHECK-NEXT: 1 4 1.00 4 VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ VMSIF_M vmsif.m v8, v4
83+
# CHECK-NEXT: 1 4 1.00 4 VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ VMSOF_M vmsof.m v8, v4
84+
85+
# CHECK: Resources:
86+
# CHECK-NEXT: [0] - VLEN512SiFive7FDiv
87+
# CHECK-NEXT: [1] - VLEN512SiFive7IDiv
88+
# CHECK-NEXT: [2] - VLEN512SiFive7PipeA
89+
# CHECK-NEXT: [3] - VLEN512SiFive7PipeB
90+
# CHECK-NEXT: [4] - VLEN512SiFive7VA
91+
# CHECK-NEXT: [5] - VLEN512SiFive7VCQ
92+
# CHECK-NEXT: [6] - VLEN512SiFive7VL
93+
# CHECK-NEXT: [7] - VLEN512SiFive7VS
94+
95+
# CHECK: Resource pressure per iteration:
96+
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
97+
# CHECK-NEXT: - - 1.00 - 71.00 24.00 - -
98+
99+
# CHECK: Resource pressure by instruction:
100+
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
101+
# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e32, m1, ta, ma
102+
# CHECK-NEXT: - - - - 3.00 1.00 - - vmslt.vv v0, v4, v20
103+
# CHECK-NEXT: - - - - 3.00 1.00 - - vmsle.vv v8, v4, v20
104+
# CHECK-NEXT: - - - - 3.00 1.00 - - vmslt.vv v8, v4, v20
105+
# CHECK-NEXT: - - - - 3.00 1.00 - - vmsle.vv v8, v4, v20
106+
# CHECK-NEXT: - - - - 3.00 1.00 - - vmseq.vv v8, v4, v20
107+
# CHECK-NEXT: - - - - 3.00 1.00 - - vmsne.vv v8, v4, v20
108+
# CHECK-NEXT: - - - - 3.00 1.00 - - vmsltu.vv v8, v4, v20
109+
# CHECK-NEXT: - - - - 3.00 1.00 - - vmsleu.vv v8, v4, v20
110+
# CHECK-NEXT: - - - - 3.00 1.00 - - vmsltu.vv v8, v4, v20
111+
# CHECK-NEXT: - - - - 3.00 1.00 - - vmsleu.vv v8, v4, v20
112+
# CHECK-NEXT: - - - - 3.00 1.00 - - vmflt.vv v0, v4, v20
113+
# CHECK-NEXT: - - - - 3.00 1.00 - - vmfle.vv v8, v4, v20
114+
# CHECK-NEXT: - - - - 3.00 1.00 - - vmflt.vv v8, v4, v20
115+
# CHECK-NEXT: - - - - 3.00 1.00 - - vmfle.vv v8, v4, v20
116+
# CHECK-NEXT: - - - - 3.00 1.00 - - vmfeq.vv v8, v4, v20
117+
# CHECK-NEXT: - - - - 3.00 1.00 - - vmfne.vv v8, v4, v20
118+
# CHECK-NEXT: - - - - 3.00 1.00 - - vmadc.vv v8, v4, v20
119+
# CHECK-NEXT: - - - - 3.00 1.00 - - vmsbc.vv v8, v4, v20
120+
# CHECK-NEXT: - - - - 4.00 1.00 - - vfirst.m a2, v4
121+
# CHECK-NEXT: - - - - 4.00 1.00 - - vcpop.m a2, v4
122+
# CHECK-NEXT: - - - - 3.00 1.00 - - viota.m v8, v4
123+
# CHECK-NEXT: - - - - 2.00 1.00 - - vmsbf.m v8, v4
124+
# CHECK-NEXT: - - - - 2.00 1.00 - - vmsif.m v8, v4
125+
# CHECK-NEXT: - - - - 2.00 1.00 - - vmsof.m v8, v4

0 commit comments

Comments
 (0)