Skip to content

Commit 08d563b

Browse files
committed
8345110: RISC-V: Optimize and and clean up byte reverse assembler routines
Reviewed-by: mli, rehn
1 parent 959fa4a commit 08d563b

File tree

4 files changed

+51
-100
lines changed

4 files changed

+51
-100
lines changed

src/hotspot/cpu/riscv/macroAssembler_riscv.cpp

Lines changed: 15 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -2461,41 +2461,6 @@ void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tm
24612461
}
24622462
}
24632463

2464-
2465-
// reverse bytes in halfword in lower 16 bits and sign-extend
2466-
// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
2467-
void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) {
2468-
if (UseZbb) {
2469-
rev8(Rd, Rs);
2470-
srai(Rd, Rd, 48);
2471-
return;
2472-
}
2473-
assert_different_registers(Rs, tmp);
2474-
assert_different_registers(Rd, tmp);
2475-
srli(tmp, Rs, 8);
2476-
andi(tmp, tmp, 0xFF);
2477-
slli(Rd, Rs, 56);
2478-
srai(Rd, Rd, 48); // sign-extend
2479-
orr(Rd, Rd, tmp);
2480-
}
2481-
2482-
// reverse bytes in lower word and sign-extend
2483-
// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits)
2484-
void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
2485-
if (UseZbb) {
2486-
rev8(Rd, Rs);
2487-
srai(Rd, Rd, 32);
2488-
return;
2489-
}
2490-
assert_different_registers(Rs, tmp1, tmp2);
2491-
assert_different_registers(Rd, tmp1, tmp2);
2492-
revb_h_w_u(Rd, Rs, tmp1, tmp2);
2493-
slli(tmp2, Rd, 48);
2494-
srai(tmp2, tmp2, 32); // sign-extend
2495-
srli(Rd, Rd, 16);
2496-
orr(Rd, Rd, tmp2);
2497-
}
2498-
24992464
// reverse bytes in halfword in lower 16 bits and zero-extend
25002465
// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
25012466
void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) {
@@ -2532,56 +2497,28 @@ void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Registe
25322497
orr(Rd, Rd, tmp2);
25332498
}
25342499

2535-
// This method is only used for revb_h
2536-
// Rd = Rs[47:0] Rs[55:48] Rs[63:56]
2537-
void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) {
2538-
assert_different_registers(Rs, tmp1, tmp2);
2539-
assert_different_registers(Rd, tmp1);
2540-
srli(tmp1, Rs, 48);
2541-
andi(tmp2, tmp1, 0xFF);
2542-
slli(tmp2, tmp2, 8);
2543-
srli(tmp1, tmp1, 8);
2544-
orr(tmp1, tmp1, tmp2);
2545-
slli(Rd, Rs, 16);
2546-
orr(Rd, Rd, tmp1);
2547-
}
2548-
2549-
// reverse bytes in each halfword
2550-
// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
2551-
void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) {
2552-
if (UseZbb) {
2553-
assert_different_registers(Rs, tmp1);
2554-
assert_different_registers(Rd, tmp1);
2555-
rev8(Rd, Rs);
2556-
zero_extend(tmp1, Rd, 32);
2557-
roriw(tmp1, tmp1, 16);
2558-
slli(tmp1, tmp1, 32);
2559-
srli(Rd, Rd, 32);
2560-
roriw(Rd, Rd, 16);
2561-
zero_extend(Rd, Rd, 32);
2562-
orr(Rd, Rd, tmp1);
2563-
return;
2564-
}
2565-
assert_different_registers(Rs, tmp1, tmp2);
2566-
assert_different_registers(Rd, tmp1, tmp2);
2567-
revb_h_helper(Rd, Rs, tmp1, tmp2);
2568-
for (int i = 0; i < 3; ++i) {
2569-
revb_h_helper(Rd, Rd, tmp1, tmp2);
2570-
}
2571-
}
2572-
2573-
// reverse bytes in each word
2574-
// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
2500+
// reverse bytes in lower word, sign-extend
2501+
// Rd[32:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
25752502
void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
25762503
if (UseZbb) {
25772504
rev8(Rd, Rs);
2578-
rori(Rd, Rd, 32);
2505+
srai(Rd, Rd, 32);
25792506
return;
25802507
}
25812508
assert_different_registers(Rs, tmp1, tmp2);
25822509
assert_different_registers(Rd, tmp1, tmp2);
2583-
revb(Rd, Rs, tmp1, tmp2);
2584-
ror_imm(Rd, Rd, 32);
2510+
andi(tmp1, Rs, 0xFF);
2511+
slli(tmp1, tmp1, 8);
2512+
for (int step = 8; step < 24; step += 8) {
2513+
srli(tmp2, Rs, step);
2514+
andi(tmp2, tmp2, 0xFF);
2515+
orr(tmp1, tmp1, tmp2);
2516+
slli(tmp1, tmp1, 8);
2517+
}
2518+
srli(Rd, Rs, 24);
2519+
andi(Rd, Rd, 0xFF);
2520+
orr(Rd, tmp1, Rd);
2521+
sign_extend(Rd, Rd, 32);
25852522
}
25862523

25872524
// reverse bytes in doubleword

src/hotspot/cpu/riscv/macroAssembler_riscv.hpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -913,13 +913,9 @@ class MacroAssembler: public Assembler {
913913
void orn(Register Rd, Register Rs1, Register Rs2);
914914

915915
// revb
916-
void revb_h_h(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, sign-extend
917-
void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in lower word, sign-extend
918916
void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, zero-extend
919917
void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in halfwords in lower 32 bits, zero-extend
920-
void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in upper 16 bits (48:63) and move to lower
921-
void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each halfword
922-
void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each word
918+
void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in lower word, sign-extend
923919
void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword
924920

925921
void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);

src/hotspot/cpu/riscv/riscv_b.ad

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -181,11 +181,15 @@ instruct bytes_reverse_int_b(iRegINoSp dst, iRegIorL2I src) %{
181181
match(Set dst (ReverseBytesI src));
182182

183183
ins_cost(ALU_COST * 2);
184-
format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int_b" %}
184+
format %{
185+
"rev8 $dst, $src\t#@bytes_reverse_int_b\t\n"
186+
"srai $dst, $dst, 32\t\n"
187+
%}
185188

186189
ins_encode %{
187190
assert(UseZbb, "must be");
188-
__ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
191+
__ rev8(as_Register($dst$$reg), as_Register($src$$reg));
192+
__ srai(as_Register($dst$$reg), as_Register($dst$$reg), 32);
189193
%}
190194

191195
ins_pipe(ialu_reg);
@@ -209,11 +213,15 @@ instruct bytes_reverse_unsigned_short_b(iRegINoSp dst, iRegIorL2I src) %{
209213
match(Set dst (ReverseBytesUS src));
210214

211215
ins_cost(ALU_COST * 2);
212-
format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short_b" %}
216+
format %{
217+
"rev8 $dst, $src\t#@bytes_reverse_unsigned_short_b\t\n"
218+
"srli $dst, $dst, 48\t\n"
219+
%}
213220

214221
ins_encode %{
215222
assert(UseZbb, "must be");
216-
__ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
223+
__ rev8(as_Register($dst$$reg), as_Register($src$$reg));
224+
__ srli(as_Register($dst$$reg), as_Register($dst$$reg), 48);
217225
%}
218226

219227
ins_pipe(ialu_reg);
@@ -223,11 +231,15 @@ instruct bytes_reverse_short_b(iRegINoSp dst, iRegIorL2I src) %{
223231
match(Set dst (ReverseBytesS src));
224232

225233
ins_cost(ALU_COST * 2);
226-
format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short_b" %}
234+
format %{
235+
"rev8 $dst, $src\t#@bytes_reverse_short_b\t\n"
236+
"srai $dst, $dst, 48\t\n"
237+
%}
227238

228239
ins_encode %{
229240
assert(UseZbb, "must be");
230-
__ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
241+
__ rev8(as_Register($dst$$reg), as_Register($src$$reg));
242+
__ srai(as_Register($dst$$reg), as_Register($dst$$reg), 48);
231243
%}
232244

233245
ins_pipe(ialu_reg);

src/hotspot/cpu/riscv/templateTable_riscv.cpp

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1621,13 +1621,14 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
16211621

16221622
// load branch displacement
16231623
if (!is_wide) {
1624+
// Convert the 16-bit value into native byte-ordering and sign-extend
16241625
__ lb(x12, at_bcp(1));
16251626
__ lbu(t1, at_bcp(2));
16261627
__ slli(x12, x12, 8);
16271628
__ add(x12, x12, t1);
16281629
} else {
16291630
__ lwu(x12, at_bcp(1));
1630-
__ revb_w_w(x12, x12); // reverse bytes in word and sign-extend
1631+
__ revb_w(x12, x12);
16311632
}
16321633

16331634
// Handle all the JSR stuff here, then exit.
@@ -1892,8 +1893,8 @@ void TemplateTable::tableswitch() {
18921893
// load lo & hi
18931894
__ lwu(x12, Address(x11, BytesPerInt));
18941895
__ lwu(x13, Address(x11, 2 * BytesPerInt));
1895-
__ revb_w_w(x12, x12); // reverse bytes in word (32bit) and sign-extend
1896-
__ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
1896+
__ revb_w(x12, x12);
1897+
__ revb_w(x13, x13);
18971898
// check against lo & hi
18981899
__ blt(x10, x12, default_case);
18991900
__ bgt(x10, x13, default_case);
@@ -1904,7 +1905,7 @@ void TemplateTable::tableswitch() {
19041905
__ profile_switch_case(x10, x11, x12);
19051906
// continue execution
19061907
__ bind(continue_execution);
1907-
__ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
1908+
__ revb_w(x13, x13);
19081909
__ add(xbcp, xbcp, x13);
19091910
__ load_unsigned_byte(t0, Address(xbcp));
19101911
__ dispatch_only(vtos, /*generate_poll*/true);
@@ -1924,14 +1925,17 @@ void TemplateTable::fast_linearswitch() {
19241925
transition(itos, vtos);
19251926
Label loop_entry, loop, found, continue_execution;
19261927
// bswap x10 so we can avoid bswapping the table entries
1927-
__ revb_w_w(x10, x10); // reverse bytes in word (32bit) and sign-extend
1928+
__ revb_w(x10, x10);
19281929
// align xbcp
19291930
__ la(x9, at_bcp(BytesPerInt)); // btw: should be able to get rid of
19301931
// this instruction (change offsets
19311932
// below)
19321933
__ andi(x9, x9, -BytesPerInt);
19331934
// set counter
19341935
__ lwu(x11, Address(x9, BytesPerInt));
1936+
// Convert the 32-bit npairs (number of pairs) into native byte-ordering
1937+
// We can use sign-extension here because npairs must be greater than or
1938+
// equal to 0 per JVM spec on 'lookupswitch' bytecode.
19351939
__ revb_w(x11, x11);
19361940
__ j(loop_entry);
19371941
// table search
@@ -1953,7 +1957,7 @@ void TemplateTable::fast_linearswitch() {
19531957
__ profile_switch_case(x11, x10, x9);
19541958
// continue execution
19551959
__ bind(continue_execution);
1956-
__ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
1960+
__ revb_w(x13, x13);
19571961
__ add(xbcp, xbcp, x13);
19581962
__ lbu(t0, Address(xbcp, 0));
19591963
__ dispatch_only(vtos, /*generate_poll*/true);
@@ -2005,7 +2009,9 @@ void TemplateTable::fast_binaryswitch() {
20052009
__ mv(i, zr); // i = 0
20062010
__ lwu(j, Address(array, -BytesPerInt)); // j = length(array)
20072011

2008-
// Convert j into native byteordering
2012+
// Convert the 32-bit npairs (number of pairs) into native byte-ordering
2013+
// We can use sign-extension here because npairs must be greater than or
2014+
// equal to 0 per JVM spec on 'lookupswitch' bytecode.
20092015
__ revb_w(j, j);
20102016

20112017
// And start
@@ -2024,7 +2030,7 @@ void TemplateTable::fast_binaryswitch() {
20242030
// Convert array[h].match to native byte-ordering before compare
20252031
__ shadd(temp, h, array, temp, 3);
20262032
__ lwu(temp, Address(temp, 0));
2027-
__ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
2033+
__ revb_w(temp, temp);
20282034

20292035
Label L_done, L_greater;
20302036
__ bge(key, temp, L_greater);
@@ -2047,14 +2053,14 @@ void TemplateTable::fast_binaryswitch() {
20472053
// Convert array[i].match to native byte-ordering before compare
20482054
__ shadd(temp, i, array, temp, 3);
20492055
__ lwu(temp, Address(temp, 0));
2050-
__ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
2056+
__ revb_w(temp, temp);
20512057
__ bne(key, temp, default_case);
20522058

20532059
// entry found -> j = offset
20542060
__ shadd(temp, i, array, temp, 3);
20552061
__ lwu(j, Address(temp, BytesPerInt));
20562062
__ profile_switch_case(i, key, array);
2057-
__ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
2063+
__ revb_w(j, j);
20582064

20592065
__ add(temp, xbcp, j);
20602066
__ load_unsigned_byte(t0, Address(temp, 0));
@@ -2067,7 +2073,7 @@ void TemplateTable::fast_binaryswitch() {
20672073
__ bind(default_case);
20682074
__ profile_switch_default(i);
20692075
__ lwu(j, Address(array, -2 * BytesPerInt));
2070-
__ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
2076+
__ revb_w(j, j);
20712077

20722078
__ add(temp, xbcp, j);
20732079
__ load_unsigned_byte(t0, Address(temp, 0));

0 commit comments

Comments
 (0)