Skip to content

Commit 6111ff1

Browse files
authored
[RISCV] Implement shouldFoldMaskToVariableShiftPair (#166159)
Folding a mask to a variable shift pair results in better code size as long as they are scalars that are <= XLen. Similar to #158069
1 parent b5f2001 commit 6111ff1

File tree

3 files changed

+143
-0
lines changed

3 files changed

+143
-0
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25309,3 +25309,12 @@ ArrayRef<MCPhysReg> RISCVTargetLowering::getRoundingControlRegisters() const {
2530925309
}
2531025310
return {};
2531125311
}
25312+
25313+
bool RISCVTargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
25314+
EVT VT = Y.getValueType();
25315+
25316+
if (VT.isVector())
25317+
return false;
25318+
25319+
return VT.getSizeInBits() <= Subtarget.getXLen();
25320+
}

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,8 @@ class RISCVTargetLowering : public TargetLowering {
465465

466466
ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
467467

468+
bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
469+
468470
/// Match a mask which "spreads" the leading elements of a vector evenly
469471
/// across the result. Factor is the spread amount, and Index is the
470472
/// offset applied.
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=RV32
3+
; RUN: llc -mtriple=riscv64-none-elf -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=RV64
4+
5+
define i32 @mask_pair(i32 %x, i32 %y) {
6+
; RV32-LABEL: mask_pair:
7+
; RV32: # %bb.0:
8+
; RV32-NEXT: srl a0, a0, a1
9+
; RV32-NEXT: sll a0, a0, a1
10+
; RV32-NEXT: ret
11+
;
12+
; RV64-LABEL: mask_pair:
13+
; RV64: # %bb.0:
14+
; RV64-NEXT: srlw a0, a0, a1
15+
; RV64-NEXT: sllw a0, a0, a1
16+
; RV64-NEXT: ret
17+
%shl = shl nsw i32 -1, %y
18+
%and = and i32 %shl, %x
19+
ret i32 %and
20+
}
21+
22+
define i64 @mask_pair_64(i64 %x, i64 %y) {
23+
; RV32-LABEL: mask_pair_64:
24+
; RV32: # %bb.0:
25+
; RV32-NEXT: li a3, -1
26+
; RV32-NEXT: addi a4, a2, -32
27+
; RV32-NEXT: sll a3, a3, a2
28+
; RV32-NEXT: bltz a4, .LBB1_2
29+
; RV32-NEXT: # %bb.1:
30+
; RV32-NEXT: mv a2, a3
31+
; RV32-NEXT: j .LBB1_3
32+
; RV32-NEXT: .LBB1_2:
33+
; RV32-NEXT: not a2, a2
34+
; RV32-NEXT: lui a5, 524288
35+
; RV32-NEXT: addi a5, a5, -1
36+
; RV32-NEXT: srl a2, a5, a2
37+
; RV32-NEXT: or a2, a3, a2
38+
; RV32-NEXT: .LBB1_3:
39+
; RV32-NEXT: srai a4, a4, 31
40+
; RV32-NEXT: and a3, a4, a3
41+
; RV32-NEXT: and a1, a2, a1
42+
; RV32-NEXT: and a0, a3, a0
43+
; RV32-NEXT: ret
44+
;
45+
; RV64-LABEL: mask_pair_64:
46+
; RV64: # %bb.0:
47+
; RV64-NEXT: srl a0, a0, a1
48+
; RV64-NEXT: sll a0, a0, a1
49+
; RV64-NEXT: ret
50+
%shl = shl nsw i64 -1, %y
51+
%and = and i64 %shl, %x
52+
ret i64 %and
53+
}
54+
55+
define i128 @mask_pair_128(i128 %x, i128 %y) {
56+
; RV32-LABEL: mask_pair_128:
57+
; RV32: # %bb.0:
58+
; RV32-NEXT: addi sp, sp, -32
59+
; RV32-NEXT: .cfi_def_cfa_offset 32
60+
; RV32-NEXT: lw a5, 0(a1)
61+
; RV32-NEXT: lw a4, 4(a1)
62+
; RV32-NEXT: lw a3, 8(a1)
63+
; RV32-NEXT: lw a1, 12(a1)
64+
; RV32-NEXT: lw a2, 0(a2)
65+
; RV32-NEXT: li a6, -1
66+
; RV32-NEXT: sw zero, 0(sp)
67+
; RV32-NEXT: sw zero, 4(sp)
68+
; RV32-NEXT: sw zero, 8(sp)
69+
; RV32-NEXT: sw zero, 12(sp)
70+
; RV32-NEXT: addi a7, sp, 16
71+
; RV32-NEXT: sw a6, 16(sp)
72+
; RV32-NEXT: sw a6, 20(sp)
73+
; RV32-NEXT: sw a6, 24(sp)
74+
; RV32-NEXT: sw a6, 28(sp)
75+
; RV32-NEXT: srli a6, a2, 3
76+
; RV32-NEXT: andi a6, a6, 12
77+
; RV32-NEXT: sub a6, a7, a6
78+
; RV32-NEXT: lw a7, 4(a6)
79+
; RV32-NEXT: lw t0, 8(a6)
80+
; RV32-NEXT: lw t1, 12(a6)
81+
; RV32-NEXT: lw a6, 0(a6)
82+
; RV32-NEXT: andi t2, a2, 31
83+
; RV32-NEXT: xori t2, t2, 31
84+
; RV32-NEXT: sll t1, t1, a2
85+
; RV32-NEXT: srli t3, t0, 1
86+
; RV32-NEXT: sll t0, t0, a2
87+
; RV32-NEXT: srli t4, a7, 1
88+
; RV32-NEXT: sll a7, a7, a2
89+
; RV32-NEXT: sll a2, a6, a2
90+
; RV32-NEXT: srli a6, a6, 1
91+
; RV32-NEXT: srl t3, t3, t2
92+
; RV32-NEXT: srl t4, t4, t2
93+
; RV32-NEXT: srl a6, a6, t2
94+
; RV32-NEXT: and a2, a2, a5
95+
; RV32-NEXT: or a5, t1, t3
96+
; RV32-NEXT: or t0, t0, t4
97+
; RV32-NEXT: or a6, a7, a6
98+
; RV32-NEXT: and a4, a6, a4
99+
; RV32-NEXT: and a3, t0, a3
100+
; RV32-NEXT: and a1, a5, a1
101+
; RV32-NEXT: sw a2, 0(a0)
102+
; RV32-NEXT: sw a4, 4(a0)
103+
; RV32-NEXT: sw a3, 8(a0)
104+
; RV32-NEXT: sw a1, 12(a0)
105+
; RV32-NEXT: addi sp, sp, 32
106+
; RV32-NEXT: .cfi_def_cfa_offset 0
107+
; RV32-NEXT: ret
108+
;
109+
; RV64-LABEL: mask_pair_128:
110+
; RV64: # %bb.0:
111+
; RV64-NEXT: li a5, -1
112+
; RV64-NEXT: addi a4, a2, -64
113+
; RV64-NEXT: sll a3, a5, a2
114+
; RV64-NEXT: bltz a4, .LBB2_2
115+
; RV64-NEXT: # %bb.1:
116+
; RV64-NEXT: mv a2, a3
117+
; RV64-NEXT: j .LBB2_3
118+
; RV64-NEXT: .LBB2_2:
119+
; RV64-NEXT: not a2, a2
120+
; RV64-NEXT: srli a5, a5, 1
121+
; RV64-NEXT: srl a2, a5, a2
122+
; RV64-NEXT: or a2, a3, a2
123+
; RV64-NEXT: .LBB2_3:
124+
; RV64-NEXT: srai a4, a4, 63
125+
; RV64-NEXT: and a3, a4, a3
126+
; RV64-NEXT: and a1, a2, a1
127+
; RV64-NEXT: and a0, a3, a0
128+
; RV64-NEXT: ret
129+
%shl = shl nsw i128 -1, %y
130+
%and = and i128 %shl, %x
131+
ret i128 %and
132+
}

0 commit comments

Comments
 (0)