Skip to content

Commit bffe0d4

Browse files
committed
WIP: AOSCOS: LOONGARCH: Port of JEP 491: Synchronize Virtual Threads without Pinning
Fixes: 78b8015 ("8338383: Implement JEP 491: Synchronize Virtual Threads without Pinning") Fixes: c113f82 ("8343957: Rename ObjectMonitor::owner_from() and JavaThread::_lock_id") Follow-up: 78b8015 ("8338383: Implement JEP 491: Synchronize Virtual Threads without Pinning") Follow-up: c113f82 ("8343957: Rename ObjectMonitor::owner_from() and JavaThread::_lock_id") Link: openjdk#21565 Link: openjdk#22524 Signed-off-by: Bingwu Zhang <[email protected]>
1 parent b73aa4f commit bffe0d4

19 files changed

+459
-88
lines changed

src/hotspot/cpu/loongarch/assembler_loongarch.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ constexpr Register LVP = S7;
9494
// temporary callee saved register
9595
constexpr Register TSR = S2;
9696

97+
// java thread pointer
9798
constexpr Register TREG = S6;
9899

99100
constexpr Register S5_heapbase = S5;

src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
320320
relocInfo::static_call_type);
321321
address call = __ trampoline_call(resolve);
322322
if (call == nullptr) {
323-
ce->bailout("trampoline stub overflow");
323+
ce->bailout("reloc call address stub overflow");
324324
return;
325325
}
326326
ce->add_call_info_here(info());

src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2267,7 +2267,7 @@ void LIR_Assembler::align_call(LIR_Code code) {}
22672267
void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
22682268
address call = __ trampoline_call(AddressLiteral(op->addr(), rtype));
22692269
if (call == nullptr) {
2270-
bailout("trampoline stub overflow");
2270+
bailout("reloc call address stub overflow");
22712271
return;
22722272
}
22732273
add_call_info(code_offset(), op->info());
@@ -2277,7 +2277,7 @@ void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
22772277
void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
22782278
address call = __ ic_call(op->addr());
22792279
if (call == nullptr) {
2280-
bailout("trampoline stub overflow");
2280+
bailout("reloc call address stub overflow");
22812281
return;
22822282
}
22832283
add_call_info(code_offset(), op->info());

src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
4141
const int aligned_mask = BytesPerWord -1;
4242
const int hdr_offset = oopDesc::mark_offset_in_bytes();
43-
assert_different_registers(hdr, obj, disp_hdr);
43+
assert_different_registers(hdr, obj, disp_hdr, T0);
4444
int null_check_offset = -1;
4545

4646
verify_oop(obj);
@@ -96,15 +96,15 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
9696
bnez(hdr, slow_case);
9797
// done
9898
bind(done);
99+
inc_held_monitor_count(T0);
99100
}
100-
increment(Address(TREG, JavaThread::held_monitor_count_offset()), 1);
101101
return null_check_offset;
102102
}
103103

104104
void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
105105
const int aligned_mask = BytesPerWord -1;
106106
const int hdr_offset = oopDesc::mark_offset_in_bytes();
107-
assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
107+
assert_different_registers(hdr, obj, disp_hdr, T0);
108108
Label done;
109109

110110
if (LockingMode != LM_LIGHTWEIGHT) {
@@ -134,8 +134,8 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_
134134
}
135135
// done
136136
bind(done);
137+
dec_held_monitor_count(T0);
137138
}
138-
decrement(Address(TREG, JavaThread::held_monitor_count_offset()), 1);
139139
}
140140

141141
// Defines obj, preserves var_size_in_bytes

src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -160,15 +160,15 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result,
160160
}
161161

162162
enum return_state_t {
163-
does_not_return, requires_return
163+
does_not_return, requires_return, requires_pop_epilogue_return
164164
};
165165

166166
// Implementation of StubFrame
167167

168168
class StubFrame: public StackObj {
169169
private:
170170
StubAssembler* _sasm;
171-
bool _return_state;
171+
return_state_t _return_state;
172172

173173
public:
174174
StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments,
@@ -183,8 +183,16 @@ void StubAssembler::prologue(const char* name, bool must_gc_arguments) {
183183
enter();
184184
}
185185

186-
void StubAssembler::epilogue() {
187-
leave();
186+
void StubAssembler::epilogue(bool use_pop) {
187+
// Avoid using a leave instruction when this frame may
188+
// have been frozen, since the current value of fp
189+
// restored from the stub would be invalid. We still
190+
// must restore the fp value saved on enter though.
191+
if (use_pop) {
192+
pop2(RA, FP);
193+
} else {
194+
leave();
195+
}
188196
jr(RA);
189197
}
190198

@@ -204,11 +212,12 @@ void StubFrame::load_argument(int offset_in_words, Register reg) {
204212
}
205213

206214
StubFrame::~StubFrame() {
207-
if (_return_state == requires_return) {
208-
__ epilogue();
209-
} else {
215+
if (_return_state == does_not_return) {
210216
__ should_not_reach_here();
217+
} else {
218+
__ epilogue(_return_state == requires_pop_epilogue_return);
211219
}
220+
_sasm = nullptr;
212221
}
213222

214223
#undef __
@@ -256,6 +265,9 @@ static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) {
256265
}
257266
}
258267

268+
int sp_offset = cpu_reg_save_offsets[TREG->encoding()];
269+
oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), TREG->as_VMReg());
270+
259271
if (save_fpu_registers) {
260272
for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
261273
FloatRegister r = as_FloatRegister(i);
@@ -333,6 +345,16 @@ void Runtime1::initialize_pd() {
333345
}
334346
}
335347

348+
// return: offset in 64-bit words.
349+
uint Runtime1::runtime_blob_current_thread_offset(frame f) {
350+
CodeBlob* cb = f.cb();
351+
assert(cb == Runtime1::blob_for(C1StubId::monitorenter_id) ||
352+
cb == Runtime1::blob_for(C1StubId::monitorenter_nofpu_id), "must be");
353+
assert(cb != nullptr && cb->is_runtime_stub(), "invalid frame");
354+
int offset = cpu_reg_save_offsets[TREG->encoding()];
355+
return offset / 2; // SP offsets are in halfwords
356+
}
357+
336358
// target: the entry point of the method that creates and posts the exception oop
337359
// has_argument: true if the exception needs arguments (passed in SCR1 and SCR2)
338360

@@ -860,7 +882,7 @@ OopMapSet* Runtime1::generate_code_for(C1StubId id, StubAssembler* sasm) {
860882
// fall through
861883
case C1StubId::monitorenter_id:
862884
{
863-
StubFrame f(sasm, "monitorenter", dont_gc_arguments);
885+
StubFrame f(sasm, "monitorenter", dont_gc_arguments, requires_pop_epilogue_return);
864886
OopMap* map = save_live_registers(sasm, save_fpu_registers);
865887

866888
// Called with store_parameter and not C abi

src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.cpp

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,13 @@
4343

4444
// using the cr register as the bool result: 0 for failed; others success.
4545
void C2_MacroAssembler::fast_lock_c2(Register oop, Register box, Register flag,
46-
Register disp_hdr, Register tmp) {
46+
Register disp_hdr, Register tmp, Register tmp1) {
4747
Label cont;
4848
Label object_has_monitor;
4949
Label count, no_count;
5050

5151
assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_lock_lightweight");
52-
assert_different_registers(oop, box, tmp, disp_hdr, flag);
52+
assert_different_registers(oop, box, tmp, tmp1, disp_hdr, flag);
5353

5454
// Load markWord from object into displaced_header.
5555
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
@@ -91,7 +91,7 @@ void C2_MacroAssembler::fast_lock_c2(Register oop, Register box, Register flag,
9191
sub_d(disp_hdr, tmp, SP);
9292
li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markWord::lock_mask_in_place));
9393
// If (mark & lock_mask) == 0 and mark - sp < page_size,
94-
// we are stack-locking and goto cont,
94+
// we are stack-locking and goto label locked,
9595
// hence we can store 0 as the displaced header in the box,
9696
// which indicates that it is a recursive lock.
9797
andr(tmp, disp_hdr, tmp);
@@ -103,12 +103,11 @@ void C2_MacroAssembler::fast_lock_c2(Register oop, Register box, Register flag,
103103
// Handle existing monitor.
104104
bind(object_has_monitor);
105105

106-
// The object's monitor m is unlocked if m->owner is null,
107-
// otherwise m->owner may contain a thread or a stack address.
108-
//
109-
// Try to CAS m->owner from null to current thread.
106+
// Try to CAS owner (no owner => current thread's _lock_id).
110107
move(AT, R0);
111108
addi_d(tmp, disp_hdr, in_bytes(ObjectMonitor::owner_offset()) - markWord::monitor_value);
109+
Register tid = tmp1;
110+
ld_d(tid, Address(TREG, JavaThread::monitor_owner_id_offset()));
112111
cmpxchg(Address(tmp, 0), AT, TREG, flag, true, true /* acquire */);
113112

114113
// Store a non-null value into the box to avoid looking like a re-entrant
@@ -120,7 +119,7 @@ void C2_MacroAssembler::fast_lock_c2(Register oop, Register box, Register flag,
120119

121120
bnez(flag, cont); // CAS success means locking succeeded
122121

123-
bne(AT, TREG, cont); // Check for recursive locking
122+
bne(AT, tid, cont); // Check for recursive locking
124123

125124
// Recursive lock case
126125
li(flag, 1);
@@ -132,7 +131,9 @@ void C2_MacroAssembler::fast_lock_c2(Register oop, Register box, Register flag,
132131
beqz(flag, no_count);
133132

134133
bind(count);
135-
increment(Address(TREG, JavaThread::held_monitor_count_offset()), 1);
134+
if (LockingMode == LM_LEGACY) {
135+
inc_held_monitor_count(T0);
136+
}
136137

137138
bind(no_count);
138139
}
@@ -231,14 +232,16 @@ void C2_MacroAssembler::fast_unlock_c2(Register oop, Register box, Register flag
231232
beqz(flag, no_count);
232233

233234
bind(count);
234-
decrement(Address(TREG, JavaThread::held_monitor_count_offset()), 1);
235+
if (LockingMode == LM_LEGACY) {
236+
dec_held_monitor_count(T0);
237+
}
235238

236239
bind(no_count);
237240
}
238241

239-
void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Register flag, Register tmp1, Register tmp2, Register tmp3) {
242+
void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Register flag, Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
240243
assert(LockingMode == LM_LIGHTWEIGHT, "must be");
241-
assert_different_registers(obj, box, tmp1, tmp2, tmp3, flag);
244+
assert_different_registers(obj, box, tmp1, tmp2, tmp3, tmp4, flag);
242245

243246
// Handle inflated monitor.
244247
Label inflated;
@@ -306,6 +309,7 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Regist
306309
bind(inflated);
307310

308311
const Register tmp1_monitor = tmp1;
312+
309313
if (!UseObjectMonitorTable) {
310314
assert(tmp1_monitor == tmp1_mark, "should be the same here");
311315
} else {
@@ -353,12 +357,14 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Regist
353357
lea(tmp2_owner_addr, owner_address);
354358

355359
move(tmp3_owner, R0);
356-
// CAS owner (null => current thread).
357-
cmpxchg(Address(tmp2_owner_addr, 0), tmp3_owner, TREG, flag, true, true /* acquire */);
360+
// Try to CAS owner (no owner => current thread's _monitor_owner_id).
361+
Register tid = tmp4;
362+
ld_d(tid, Address(TREG, JavaThread::monitor_owner_id_offset()));
363+
cmpxchg(Address(tmp2_owner_addr, 0), tmp3_owner, tid, flag, true, true /* acquire */);
358364
bnez(flag, locked);
359365

360366
// Check if recursive.
361-
bne(tmp3_owner, TREG, slow_path);
367+
bne(tmp3_owner, tid, slow_path);
362368

363369
// Recursive.
364370
increment(recursions_address, 1);
@@ -372,7 +378,6 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Regist
372378
bind(locked);
373379
// Set flag != 0
374380
li(flag, 1);
375-
increment(Address(TREG, JavaThread::held_monitor_count_offset()), 1);
376381

377382
#ifdef ASSERT
378383
// Check that locked label is reached with flag != 0.
@@ -535,7 +540,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, Regi
535540
bind(unlocked);
536541
// Set flag != 0
537542
li(flag, 1);
538-
decrement(Address(TREG, JavaThread::held_monitor_count_offset()), 1);
539543

540544
#ifdef ASSERT
541545
// Check that unlocked label is reached with flag != 0.

src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,12 @@
3434
void cmp_branchEqNe_off21(int flag, Register op1, Label& L);
3535

3636
void fast_lock_c2(Register oop, Register box, Register flag,
37-
Register disp_hdr, Register tmp);
37+
Register disp_hdr, Register tmp, Register tmp1);
3838
void fast_unlock_c2(Register oop, Register box, Register flag,
3939
Register disp_hdr, Register tmp);
4040
// Code used by cmpFastLockLightweight and cmpFastUnlockLightweight mach instructions in .ad file.
4141
void fast_lock_lightweight(Register object, Register box, Register flag,
42-
Register tmp1, Register tmp2, Register tmp3);
42+
Register tmp1, Register tmp2, Register tmp3, Register tmp4);
4343
void fast_unlock_lightweight(Register object, Register box, Register flag,
4444
Register tmp1, Register tmp2, Register tmp3);
4545

src/hotspot/cpu/loongarch/continuationFreezeThaw_loongarch.inline.hpp

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,11 @@ void FreezeBase::adjust_interpreted_frame_unextended_sp(frame& f) {
128128
}
129129
}
130130

131+
inline void FreezeBase::prepare_freeze_interpreted_top_frame(frame& f) {
132+
assert(f.interpreter_frame_last_sp() == nullptr, "should be null for top frame");
133+
f.interpreter_frame_set_last_sp(f.unextended_sp());
134+
}
135+
131136
inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, const frame& hf) {
132137
assert(hf.fp() == hf.unextended_sp() + (f.fp() - f.unextended_sp()), "");
133138
assert((f.at(frame::interpreter_frame_last_sp_offset) != 0)
@@ -191,7 +196,8 @@ inline void Thaw<ConfigT>::patch_caller_links(intptr_t* sp, intptr_t* bottom) {
191196

192197
inline frame ThawBase::new_entry_frame() {
193198
intptr_t* sp = _cont.entrySP();
194-
return frame(sp, sp, _cont.entryFP(), _cont.entryPC()); // TODO PERF: This finds code blob and computes deopt state
199+
// TODO PERF: This finds code blob and computes deopt state
200+
return frame(sp, sp, _cont.entryFP(), _cont.entryPC());
195201
}
196202

197203
template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame& caller, bool bottom) {
@@ -203,7 +209,6 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
203209
// If caller is interpreted it already made room for the callee arguments
204210
int overlap = caller.is_interpreted_frame() ? ContinuationHelper::InterpretedFrame::stack_argsize(hf) : 0;
205211
const int fsize = (int)(ContinuationHelper::InterpretedFrame::frame_bottom(hf) - hf.unextended_sp() - overlap);
206-
const int locals = hf.interpreter_frame_method()->max_locals();
207212
intptr_t* frame_sp = caller.unextended_sp() - fsize;
208213
intptr_t* fp = frame_sp + (hf.fp() - heap_sp);
209214
DEBUG_ONLY(intptr_t* unextended_sp = fp + *hf.addr_at(frame::interpreter_frame_last_sp_offset);)
@@ -219,7 +224,7 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
219224
int fsize = FKind::size(hf);
220225
intptr_t* frame_sp = caller.unextended_sp() - fsize;
221226
if (bottom || caller.is_interpreted_frame()) {
222-
int argsize = hf.compiled_frame_stack_argsize();
227+
int argsize = FKind::stack_argsize(hf);
223228

224229
fsize += argsize;
225230
frame_sp -= argsize;
@@ -234,11 +239,13 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
234239
intptr_t* fp;
235240
if (PreserveFramePointer) {
236241
// we need to recreate a "real" frame pointer, pointing into the stack
237-
fp = frame_sp + FKind::size(hf) - 2;
242+
fp = frame_sp + FKind::size(hf) - frame::sender_sp_offset;
238243
} else {
239-
fp = FKind::stub
240-
? frame_sp + fsize - 2 // this value is used for the safepoint stub
241-
: *(intptr_t**)(hf.sp() - 2); // we need to re-read fp because it may be an oop and we might have fixed the frame.
244+
fp = FKind::stub || FKind::native
245+
// fp always points to the address above the pushed return pc. We need correct address.
246+
? frame_sp + fsize - frame::sender_sp_offset
247+
// we need to re-read fp because it may be an oop and we might have fixed the frame.
248+
: *(intptr_t**)(hf.sp() - 2);
242249
}
243250
return frame(frame_sp, frame_sp, fp, hf.pc(), hf.cb(), hf.oop_map(), false); // TODO PERF : this computes deopt state; is it necessary?
244251
}
@@ -261,6 +268,22 @@ inline void ThawBase::patch_pd(frame& f, const frame& caller) {
261268
patch_callee_link(caller, caller.fp());
262269
}
263270

271+
inline void ThawBase::patch_pd(frame& f, intptr_t* caller_sp) {
272+
intptr_t* fp = caller_sp - frame::sender_sp_offset;
273+
patch_callee_link(f, fp);
274+
}
275+
276+
inline intptr_t* ThawBase::push_cleanup_continuation() {
277+
frame enterSpecial = new_entry_frame();
278+
intptr_t* sp = enterSpecial.sp();
279+
280+
sp[-1] = (intptr_t)ContinuationEntry::cleanup_pc();
281+
sp[-2] = (intptr_t)enterSpecial.fp();
282+
283+
log_develop_trace(continuations, preempt)("push_cleanup_continuation initial sp: " INTPTR_FORMAT " final sp: " INTPTR_FORMAT, p2i(sp + 2 * frame::metadata_words), p2i(sp));
284+
return sp;
285+
}
286+
264287
inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, const frame& f) {
265288
// Make sure that last_sp is kept relativized.
266289
assert((intptr_t*)f.at_relative(frame::interpreter_frame_last_sp_offset) == f.unextended_sp(), "");

0 commit comments

Comments
 (0)