Skip to content

Commit 1c0c36c

Browse files
robehnpull[bot]
authored andcommitted
8340241: RISC-V: Returns mispredicted
Reviewed-by: fyang, luhenry
1 parent c2ab5dc commit 1c0c36c

18 files changed

+171
-132
lines changed

src/hotspot/cpu/riscv/assembler_riscv.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2892,8 +2892,9 @@ enum Nf {
28922892
// Unconditional branch instructions
28932893
// --------------------------
28942894
protected:
2895-
// All calls and jumps must go via MASM.
2895+
// All calls and jumps must go via MASM. Only use x1 (aka ra) as link register for now.
28962896
void jalr(Register Rd, Register Rs, const int32_t offset) {
2897+
assert(Rd != x5 && Rs != x5, "Register x5 must not be used for calls/jumps.");
28972898
/* jalr -> c.jr/c.jalr */
28982899
if (do_compress() && (offset == 0 && Rs != x0)) {
28992900
if (Rd == x1) {
@@ -2908,14 +2909,14 @@ enum Nf {
29082909
}
29092910

29102911
void jal(Register Rd, const int32_t offset) {
2912+
assert(Rd != x5, "Register x5 must not be used for calls/jumps.");
29112913
/* jal -> c.j, note c.jal is RV32C only */
29122914
if (do_compress() &&
29132915
Rd == x0 &&
29142916
is_simm12(offset) && ((offset % 2) == 0)) {
29152917
c_j(offset);
29162918
return;
29172919
}
2918-
29192920
_jal(Rd, offset);
29202921
}
29212922

src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) {
9393
stub_id = C1StubId::throw_range_check_failed_id;
9494
}
9595
// t0 and t1 are used as args in generate_exception_throw,
96-
// so use ra as the tmp register for rt_call.
96+
// so use x1/ra as the tmp register for rt_call.
9797
__ rt_call(Runtime1::entry_for(stub_id), ra);
9898
ce->add_call_info_here(_info);
9999
ce->verify_oop_map(_info);
@@ -275,7 +275,7 @@ void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
275275
if (_obj->is_cpu_register()) {
276276
__ mv(t0, _obj->as_register());
277277
}
278-
__ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), t1);
278+
__ far_call(RuntimeAddress(Runtime1::entry_for(_stub)));
279279
ce->add_call_info_here(_info);
280280
debug_only(__ should_not_reach_here());
281281
}

src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,8 +271,8 @@ static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStub
271271
__ mv(c_rarg0, arg);
272272
}
273273
__ mv(c_rarg1, xthread);
274-
__ mv(t0, runtime_path);
275-
__ jalr(t0);
274+
__ mv(t1, runtime_path);
275+
__ jalr(t1);
276276
}
277277

278278
void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm,

src/hotspot/cpu/riscv/gc/x/xBarrierSetAssembler_riscv.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -339,8 +339,8 @@ void XBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, X
339339
XSaveLiveRegisters save_live_registers(masm, stub);
340340
XSetupArguments setup_arguments(masm, stub);
341341

342-
__ mv(t0, stub->slow_path());
343-
__ jalr(t0);
342+
__ mv(t1, stub->slow_path());
343+
__ jalr(t1);
344344
}
345345

346346
// Stub exit

src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -724,8 +724,8 @@ void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, Z
724724
{
725725
SaveLiveRegisters save_live_registers(masm, stub);
726726
ZSetupArguments setup_arguments(masm, stub);
727-
__ mv(t0, stub->slow_path());
728-
__ jalr(t0);
727+
__ mv(t1, stub->slow_path());
728+
__ jalr(t1);
729729
}
730730

731731
// Stub exit
@@ -758,15 +758,15 @@ void ZBarrierSetAssembler::generate_c2_store_barrier_stub(MacroAssembler* masm,
758758
__ la(c_rarg0, stub->ref_addr());
759759

760760
if (stub->is_native()) {
761-
__ la(t0, RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_native_oop_field_without_healing_addr()));
761+
__ la(t1, RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_native_oop_field_without_healing_addr()));
762762
} else if (stub->is_atomic()) {
763-
__ la(t0, RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_oop_field_with_healing_addr()));
763+
__ la(t1, RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_oop_field_with_healing_addr()));
764764
} else if (stub->is_nokeepalive()) {
765-
__ la(t0, RuntimeAddress(ZBarrierSetRuntime::no_keepalive_store_barrier_on_oop_field_without_healing_addr()));
765+
__ la(t1, RuntimeAddress(ZBarrierSetRuntime::no_keepalive_store_barrier_on_oop_field_without_healing_addr()));
766766
} else {
767-
__ la(t0, RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_oop_field_without_healing_addr()));
767+
__ la(t1, RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_oop_field_without_healing_addr()));
768768
}
769-
__ jalr(t0);
769+
__ jalr(t1);
770770
}
771771

772772
// Stub exit

src/hotspot/cpu/riscv/interp_masm_riscv.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -421,13 +421,13 @@ void InterpreterMacroAssembler::jump_from_interpreted(Register method) {
421421
// interp_only_mode if these events CAN be enabled.
422422
lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
423423
beqz(t0, run_compiled_code);
424-
ld(t0, Address(method, Method::interpreter_entry_offset()));
425-
jr(t0);
424+
ld(t1, Address(method, Method::interpreter_entry_offset()));
425+
jr(t1);
426426
bind(run_compiled_code);
427427
}
428428

429-
ld(t0, Address(method, Method::from_interpreted_offset()));
430-
jr(t0);
429+
ld(t1, Address(method, Method::from_interpreted_offset()));
430+
jr(t1);
431431
}
432432

433433
// The following two routines provide a hook so that an implementation

src/hotspot/cpu/riscv/macroAssembler_riscv.cpp

Lines changed: 31 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -457,8 +457,8 @@ void MacroAssembler::call_VM_base(Register oop_result,
457457
RuntimeAddress target(StubRoutines::forward_exception_entry());
458458
relocate(target.rspec(), [&] {
459459
int32_t offset;
460-
la(t0, target.target(), offset);
461-
jr(t0, offset);
460+
la(t1, target.target(), offset);
461+
jr(t1, offset);
462462
});
463463
bind(ok);
464464
}
@@ -760,21 +760,21 @@ void MacroAssembler::emit_static_call_stub() {
760760

761761
// Jump to the entry point of the c2i stub.
762762
int32_t offset = 0;
763-
movptr(t0, 0, offset, t1); // lui + lui + slli + add
764-
jr(t0, offset);
763+
movptr(t1, 0, offset, t0); // lui + lui + slli + add
764+
jr(t1, offset);
765765
}
766766

767767
void MacroAssembler::call_VM_leaf_base(address entry_point,
768768
int number_of_arguments,
769769
Label *retaddr) {
770770
int32_t offset = 0;
771-
push_reg(RegSet::of(t0, xmethod), sp); // push << t0 & xmethod >> to sp
772-
mv(t0, entry_point, offset);
773-
jalr(t0, offset);
771+
push_reg(RegSet::of(t1, xmethod), sp); // push << t1 & xmethod >> to sp
772+
mv(t1, entry_point, offset);
773+
jalr(t1, offset);
774774
if (retaddr != nullptr) {
775775
bind(*retaddr);
776776
}
777-
pop_reg(RegSet::of(t0, xmethod), sp); // pop << t0 & xmethod >> from sp
777+
pop_reg(RegSet::of(t1, xmethod), sp); // pop << t1 & xmethod >> from sp
778778
}
779779

780780
void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
@@ -941,6 +941,7 @@ void MacroAssembler::li(Register Rd, int64_t imm) {
941941

942942
void MacroAssembler::load_link_jump(const address source, Register temp) {
943943
assert(temp != noreg && temp != x0, "expecting a register");
944+
assert(temp != x5, "temp register must not be x5.");
944945
assert_cond(source != nullptr);
945946
int64_t distance = source - pc();
946947
assert(is_simm32(distance), "Must be");
@@ -968,7 +969,8 @@ void MacroAssembler::j(const address dest, Register temp) {
968969
if (is_simm21(distance) && ((distance % 2) == 0)) {
969970
Assembler::jal(x0, distance);
970971
} else {
971-
assert(temp != noreg && temp != x0, "expecting a register");
972+
assert(temp != noreg && temp != x0, "Expecting a register");
973+
assert(temp != x1 && temp != x5, "temp register must not be x1/x5.");
972974
int32_t offset = 0;
973975
la(temp, dest, offset);
974976
jr(temp, offset);
@@ -1006,23 +1008,27 @@ void MacroAssembler::j(Label &lab, Register temp) {
10061008

10071009
void MacroAssembler::jr(Register Rd, int32_t offset) {
10081010
assert(Rd != noreg, "expecting a register");
1011+
assert(Rd != x1 && Rd != x5, "Rd register must not be x1/x5.");
10091012
Assembler::jalr(x0, Rd, offset);
10101013
}
10111014

10121015
void MacroAssembler::call(const address dest, Register temp) {
10131016
assert_cond(dest != nullptr);
10141017
assert(temp != noreg, "expecting a register");
1018+
assert(temp != x5, "temp register must not be x5.");
10151019
int32_t offset = 0;
10161020
la(temp, dest, offset);
10171021
jalr(temp, offset);
10181022
}
10191023

10201024
void MacroAssembler::jalr(Register Rs, int32_t offset) {
10211025
assert(Rs != noreg, "expecting a register");
1026+
assert(Rs != x5, "Rs register must not be x5.");
10221027
Assembler::jalr(x1, Rs, offset);
10231028
}
10241029

10251030
void MacroAssembler::rt_call(address dest, Register tmp) {
1031+
assert(tmp != x5, "tmp register must not be x5.");
10261032
CodeBlob *cb = CodeCache::find_blob(dest);
10271033
RuntimeAddress target(dest);
10281034
if (cb) {
@@ -1762,7 +1768,7 @@ void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_byte
17621768

17631769
static int patch_offset_in_jal(address branch, int64_t offset) {
17641770
assert(Assembler::is_simm21(offset) && ((offset % 2) == 0),
1765-
"offset is too large to be patched in one jal instruction!\n");
1771+
"offset (%ld) is too large to be patched in one jal instruction!\n", offset);
17661772
Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31]
17671773
Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21]
17681774
Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20]
@@ -3658,6 +3664,7 @@ void MacroAssembler::far_jump(const Address &entry, Register tmp) {
36583664
}
36593665

36603666
void MacroAssembler::far_call(const Address &entry, Register tmp) {
3667+
assert(tmp != x5, "tmp register must not be x5.");
36613668
assert(CodeCache::find_blob(entry.target()) != nullptr,
36623669
"destination of far call not found in code cache");
36633670
assert(entry.rspec().type() == relocInfo::external_word_type
@@ -4072,7 +4079,7 @@ void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass,
40724079
Register tmp1,
40734080
Register tmp2,
40744081
Register tmp3) {
4075-
assert_different_registers(r_sub_klass, r_super_klass, tmp1, tmp2, tmp3, result, t0);
4082+
assert_different_registers(r_sub_klass, r_super_klass, tmp1, tmp2, tmp3, result, t0, t1);
40764083

40774084
const Register
40784085
r_array_base = tmp1, // X11
@@ -4139,8 +4146,8 @@ void MacroAssembler::get_thread(Register thread) {
41394146
RegSet::range(x28, x31) + ra - thread;
41404147
push_reg(saved_regs, sp);
41414148

4142-
mv(ra, CAST_FROM_FN_PTR(address, Thread::current));
4143-
jalr(ra);
4149+
mv(t1, CAST_FROM_FN_PTR(address, Thread::current));
4150+
jalr(t1);
41444151
if (thread != c_rarg0) {
41454152
mv(thread, c_rarg0);
41464153
}
@@ -4187,8 +4194,8 @@ void MacroAssembler::reserved_stack_check() {
41874194
// We have already removed our own frame.
41884195
// throw_delayed_StackOverflowError will think that it's been
41894196
// called by our caller.
4190-
la(t0, RuntimeAddress(SharedRuntime::throw_delayed_StackOverflowError_entry()));
4191-
jr(t0);
4197+
la(t1, RuntimeAddress(SharedRuntime::throw_delayed_StackOverflowError_entry()));
4198+
jr(t1);
41924199
should_not_reach_here();
41934200

41944201
bind(no_reserved_zone_enabling);
@@ -4299,7 +4306,7 @@ address MacroAssembler::load_and_call(Address entry) {
42994306
}
43004307
#endif
43014308
relocate(entry.rspec(), [&] {
4302-
load_link_jump(target);
4309+
load_link_jump(target, t1);
43034310
});
43044311

43054312
postcond(pc() != badAddress);
@@ -4309,7 +4316,7 @@ address MacroAssembler::load_and_call(Address entry) {
43094316
address MacroAssembler::ic_call(address entry, jint method_index) {
43104317
RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
43114318
IncompressibleRegion ir(this); // relocations
4312-
movptr(t1, (address)Universe::non_oop_word(), t0);
4319+
movptr(t0, (address)Universe::non_oop_word(), t1);
43134320
assert_cond(entry != nullptr);
43144321
return reloc_call(Address(entry, rh));
43154322
}
@@ -4323,9 +4330,9 @@ int MacroAssembler::ic_check_size() {
43234330
int MacroAssembler::ic_check(int end_alignment) {
43244331
IncompressibleRegion ir(this);
43254332
Register receiver = j_rarg0;
4326-
Register data = t1;
4333+
Register data = t0;
43274334

4328-
Register tmp1 = t0; // t0 always scratch
4335+
Register tmp1 = t1; // scratch
43294336
// t2 is saved on call, thus should have been saved before this check.
43304337
// Hence we can clobber it.
43314338
Register tmp2 = t2;
@@ -4423,8 +4430,8 @@ address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
44234430
// - load the call
44244431
// - call
44254432
Label target;
4426-
ld(t0, target); // auipc + ld
4427-
jr(t0); // jalr
4433+
ld(t1, target); // auipc + ld
4434+
jr(t1); // jalr
44284435
bind(target);
44294436
assert(offset() - stub_start_offset == MacroAssembler::NativeShortCall::trampoline_data_offset,
44304437
"should be");
@@ -5148,11 +5155,11 @@ const int MacroAssembler::zero_words_block_size = 8;
51485155
// ptr: Address of a buffer to be zeroed.
51495156
// cnt: Count in HeapWords.
51505157
//
5151-
// ptr, cnt, and t0 are clobbered.
5158+
// ptr, cnt, t1, and t0 are clobbered.
51525159
address MacroAssembler::zero_words(Register ptr, Register cnt) {
51535160
assert(is_power_of_2(zero_words_block_size), "adjust this");
51545161
assert(ptr == x28 && cnt == x29, "mismatch in register usage");
5155-
assert_different_registers(cnt, t0);
5162+
assert_different_registers(cnt, t0, t1);
51565163

51575164
BLOCK_COMMENT("zero_words {");
51585165

@@ -5170,6 +5177,7 @@ address MacroAssembler::zero_words(Register ptr, Register cnt) {
51705177
return nullptr;
51715178
}
51725179
} else {
5180+
// Clobbers t1
51735181
rt_call(zero_blocks.target());
51745182
}
51755183
}

src/hotspot/cpu/riscv/macroAssembler_riscv.hpp

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -627,34 +627,59 @@ class MacroAssembler: public Assembler {
627627
void bgtz(Register Rs, const address dest);
628628

629629
private:
630-
void load_link_jump(const address source, Register temp = t0);
630+
void load_link_jump(const address source, Register temp);
631631
void jump_link(const address dest, Register temp);
632632
public:
633633
// We try to follow risc-v asm menomics.
634634
// But as we don't layout a reachable GOT,
635635
// we often need to resort to movptr, li <48imm>.
636636
// https://github.com/riscv-non-isa/riscv-asm-manual/blob/master/riscv-asm.md
637637

638+
// Hotspot only use the standard calling convention using x1/ra.
639+
// The alternative calling convection using x5/t0 is not used.
640+
// Using x5 as a temp causes the CPU to mispredict returns.
641+
642+
// JALR, return address stack updates:
643+
// | rd is x1/x5 | rs1 is x1/x5 | rd=rs1 | RAS action
644+
// | ----------- | ------------ | ------ |-------------
645+
// | No | No | — | None
646+
// | No | Yes | — | Pop
647+
// | Yes | No | — | Push
648+
// | Yes | Yes | No | Pop, then push
649+
// | Yes | Yes | Yes | Push
650+
//
651+
// JAL, return address stack updates:
652+
// | rd is x1/x5 | RAS action
653+
// | ----------- | ----------
654+
// | Yes | Push
655+
// | No | None
656+
//
657+
// JUMPs uses Rd = x0/zero and Rs = x6/t1 or imm
658+
// CALLS uses Rd = x1/ra and Rs = x6/t1 or imm (or x1/ra*)
659+
// RETURNS uses Rd = x0/zero and Rs = x1/ra
660+
// *use of x1/ra should not normally be used, special case only.
661+
638662
// jump: jal x0, offset
639663
// For long reach uses temp register for:
640664
// la + jr
641-
void j(const address dest, Register temp = t0);
642-
void j(const Address &adr, Register temp = t0);
643-
void j(Label &l, Register temp = t0);
665+
void j(const address dest, Register temp = t1);
666+
void j(const Address &adr, Register temp = t1);
667+
void j(Label &l, Register temp = noreg);
644668

645669
// jump register: jalr x0, offset(rs)
646670
void jr(Register Rd, int32_t offset = 0);
647671

648672
// call: la + jalr x1
649-
void call(const address dest, Register temp = t0);
673+
void call(const address dest, Register temp = t1);
650674

651675
// jalr: jalr x1, offset(rs)
652676
void jalr(Register Rs, int32_t offset = 0);
653677

654678
// Emit a runtime call. Only invalidates the tmp register which
655679
// is used to keep the entry address for jalr/movptr.
656680
// Uses call() for intra code cache, else movptr + jalr.
657-
void rt_call(address dest, Register tmp = t0);
681+
// Clobebrs t1
682+
void rt_call(address dest, Register tmp = t1);
658683

659684
// ret: jalr x0, 0(x1)
660685
inline void ret() {
@@ -1165,8 +1190,9 @@ class MacroAssembler: public Assembler {
11651190
// - relocInfo::external_word_type
11661191
// - relocInfo::runtime_call_type
11671192
// - relocInfo::none
1168-
void far_call(const Address &entry, Register tmp = t0);
1169-
void far_jump(const Address &entry, Register tmp = t0);
1193+
// Clobbers t1 default.
1194+
void far_call(const Address &entry, Register tmp = t1);
1195+
void far_jump(const Address &entry, Register tmp = t1);
11701196

11711197
static int far_branch_size() {
11721198
return 2 * 4; // auipc + jalr, see far_call() & far_jump()

0 commit comments

Comments
 (0)