From 7c5ebae7ac4f60bf7b1c5eddfeaab855d7d1dced Mon Sep 17 00:00:00 2001 From: Bernhard Urban-Forster Date: Tue, 5 Oct 2021 22:17:56 +0200 Subject: [PATCH 1/5] JDK-8274795: avoid spilling and restoring r18 in macro assembler r18 should not be used as it is reserved as platform register. Linux is fine with userspace using it, but Windows and also recently macOS ( https://github.com/openjdk/jdk11u-dev/pull/301#issuecomment-911998917 ) are actually using it on the kernel side. The macro assembler uses the bit pattern `0x7fffffff` (== `r0-r30`) to specify which registers to spill; fortunately this helper is only used here: https://github.com/openjdk/jdk/blob/c05dc268acaf87236f30cf700ea3ac778e3b20e5/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp#L1400-L1404 I haven't seen causing this particular instance any issues in practice _yet_, presumably because it looks hard to align the stars in order to trigger a problem (between stp and ldp of r18 a transition to kernel space must happen *and* the kernel needs to do something with r18). But jdk11u-dev has more usages of the `::pusha`/`::popa` macro and that causes troubles as explained in the link above. Output of `-XX:+PrintInterpreter` before this change: ``` ---------------------------------------------------------------------- method entry point (kind = native) [0x0000000138809b00, 0x000000013880a280] 1920 bytes -------------------------------------------------------------------------------- 0x0000000138809b00: ldr x2, [x12, #16] 0x0000000138809b04: ldrh w2, [x2, #44] 0x0000000138809b08: add x24, x20, x2, uxtx #3 0x0000000138809b0c: sub x24, x24, #0x8 [...] 0x0000000138809fa4: stp x16, x17, [sp, #128] 0x0000000138809fa8: stp x18, x19, [sp, #144] 0x0000000138809fac: stp x20, x21, [sp, #160] [...] 0x0000000138809fc0: stp x30, xzr, [sp, #240] 0x0000000138809fc4: mov x0, x28 ;; 0x10864ACCC 0x0000000138809fc8: mov x9, #0xaccc // #44236 0x0000000138809fcc: movk x9, #0x864, lsl #16 0x0000000138809fd0: movk x9, #0x1, lsl #32 0x0000000138809fd4: blr x9 0x0000000138809fd8: ldp x2, x3, [sp, #16] [...] 0x0000000138809ff4: ldp x16, x17, [sp, #128] 0x0000000138809ff8: ldp x18, x19, [sp, #144] 0x0000000138809ffc: ldp x20, x21, [sp, #160] ``` After: ``` ---------------------------------------------------------------------- method entry point (kind = native) [0x0000000108e4db00, 0x0000000108e4e280] 1920 bytes -------------------------------------------------------------------------------- 0x0000000108e4db00: ldr x2, [x12, #16] 0x0000000108e4db04: ldrh w2, [x2, #44] 0x0000000108e4db08: add x24, x20, x2, uxtx #3 0x0000000108e4db0c: sub x24, x24, #0x8 [...] 0x0000000108e4dfa4: stp x16, x17, [sp, #128] 0x0000000108e4dfa8: stp x19, x20, [sp, #144] 0x0000000108e4dfac: stp x21, x22, [sp, #160] [...] 0x0000000108e4dfbc: stp x29, x30, [sp, #224] 0x0000000108e4dfc0: mov x0, x28 ;; 0x107E4A06C 0x0000000108e4dfc4: mov x9, #0xa06c // #41068 0x0000000108e4dfc8: movk x9, #0x7e4, lsl #16 0x0000000108e4dfcc: movk x9, #0x1, lsl #32 0x0000000108e4dfd0: blr x9 0x0000000108e4dfd4: ldp x2, x3, [sp, #16] [...] 0x0000000108e4dff0: ldp x16, x17, [sp, #128] 0x0000000108e4dff4: ldp x19, x20, [sp, #144] 0x0000000108e4dff8: ldp x21, x22, [sp, #160] [...] ``` --- src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp | 11 +++++++++-- src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index de228bfeb6f9c..532fde30a5c23 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -1855,13 +1855,20 @@ void MacroAssembler::increment(Address dst, int value) str(rscratch1, dst); } +RegSet MacroAssembler::save_all_registers() { + RegSet regs = RegSet::range(r0, r30); +#ifdef R18_RESERVED + regs -= r18_tls; +#endif + return regs; +} void MacroAssembler::pusha() { - push(0x7fffffff, sp); + push(save_all_registers(), sp); } void MacroAssembler::popa() { - pop(0x7fffffff, sp); + pop(save_all_registers(), sp); } // Push lots of registers in the bit set supplied. Don't push sp. diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index dae0a20665636..c052b8d07b357 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -464,6 +464,7 @@ class MacroAssembler: public Assembler { void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); } void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); } + static RegSet save_all_registers(); static RegSet call_clobbered_registers(); // Push and pop everything that might be clobbered by a native From 84c5c902cbfef8a24a3e2b0123596e2ef125b07f Mon Sep 17 00:00:00 2001 From: Bernhard Urban-Forster Date: Wed, 6 Oct 2021 10:58:22 +0200 Subject: [PATCH 2/5] remove pusha/popa, be explicit in the template generator Restore looks like this now: ``` 0x0000000106e4dfcc: movk x9, #0x5e4, lsl #16 0x0000000106e4dfd0: movk x9, #0x1, lsl #32 0x0000000106e4dfd4: blr x9 0x0000000106e4dfd8: ldp x2, x3, [sp, #16] 0x0000000106e4dfdc: ldp x4, x5, [sp, #32] 0x0000000106e4dfe0: ldp x6, x7, [sp, #48] 0x0000000106e4dfe4: ldp x8, x9, [sp, #64] 0x0000000106e4dfe8: ldp x10, x11, [sp, #80] 0x0000000106e4dfec: ldp x12, x13, [sp, #96] 0x0000000106e4dff0: ldp x14, x15, [sp, #112] 0x0000000106e4dff4: ldp x16, x17, [sp, #128] 0x0000000106e4dff8: ldp x0, x1, [sp], #144 0x0000000106e4dffc: ldp xzr, x19, [sp], #16 0x0000000106e4e000: ldp x22, x23, [sp, #16] 0x0000000106e4e004: ldp x24, x25, [sp, #32] 0x0000000106e4e008: ldp x26, x27, [sp, #48] 0x0000000106e4e00c: ldp x28, x29, [sp, #64] 0x0000000106e4e010: ldp x30, xzr, [sp, #80] 0x0000000106e4e014: ldp x20, x21, [sp], #96 0x0000000106e4e018: ldur x12, [x29, #-24] 0x0000000106e4e01c: ldr x22, [x12, #16] 0x0000000106e4e020: add x22, x22, #0x30 0x0000000106e4e024: ldr x8, [x28, #8] ``` --- .../cpu/aarch64/macroAssembler_aarch64.cpp | 16 ---------------- .../cpu/aarch64/macroAssembler_aarch64.hpp | 5 ----- .../templateInterpreterGenerator_aarch64.cpp | 12 ++++++++++-- 3 files changed, 10 insertions(+), 23 deletions(-) diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index 532fde30a5c23..28a13ff3c0b0d 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -1855,22 +1855,6 @@ void MacroAssembler::increment(Address dst, int value) str(rscratch1, dst); } -RegSet MacroAssembler::save_all_registers() { - RegSet regs = RegSet::range(r0, r30); -#ifdef R18_RESERVED - regs -= r18_tls; -#endif - return regs; -} - -void MacroAssembler::pusha() { - push(save_all_registers(), sp); -} - -void MacroAssembler::popa() { - pop(save_all_registers(), sp); -} - // Push lots of registers in the bit set supplied. Don't push sp. // Return the number of words pushed int MacroAssembler::push(unsigned int bitset, Register stack) { diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index c052b8d07b357..3287f153ab8f8 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -464,7 +464,6 @@ class MacroAssembler: public Assembler { void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); } void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); } - static RegSet save_all_registers(); static RegSet call_clobbered_registers(); // Push and pop everything that might be clobbered by a native @@ -1107,10 +1106,6 @@ class MacroAssembler: public Assembler { void push(Register src); void pop(Register dst); - // push all registers onto the stack - void pusha(); - void popa(); - void repne_scan(Register addr, Register value, Register count, Register scratch); void repne_scanw(Register addr, Register value, Register count, diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp index db253fe5c2cd0..d069345b9c048 100644 --- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp @@ -1397,11 +1397,19 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { __ cmp(rscratch1, (u1)StackOverflow::stack_guard_yellow_reserved_disabled); __ br(Assembler::NE, no_reguard); - __ pusha(); // XXX only save smashed registers + __ push(RegSet::range(r0, r30), sp); __ mov(c_rarg0, rthread); __ mov(rscratch2, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); __ blr(rscratch2); - __ popa(); // XXX only restore smashed registers + + __ pop(RegSet::range(r0, r17), sp); +#ifdef R18_RESERVED + __ ldp(zr, r19, Address(__ post(sp, 2 * wordSize))); + __ pop(RegSet::range(r20, r30), sp); +#else + __ pop(RegSet::range(r18, r30), sp); +#endif + __ bind(no_reguard); } From ba55aa0e707ea348ce2699d234fc040a5a03db1b Mon Sep 17 00:00:00 2001 From: Bernhard Urban-Forster Date: Thu, 7 Oct 2021 09:33:00 +0200 Subject: [PATCH 3/5] fix linux/aarch64 build: s/r18/r18_tls --- .../cpu/aarch64/templateInterpreterGenerator_aarch64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp index d069345b9c048..27a6e084ebb9d 100644 --- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp @@ -1407,7 +1407,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { __ ldp(zr, r19, Address(__ post(sp, 2 * wordSize))); __ pop(RegSet::range(r20, r30), sp); #else - __ pop(RegSet::range(r18, r30), sp); + __ pop(RegSet::range(r18_tls, r30), sp); #endif __ bind(no_reguard); From d7ca7b3904bddec9e58e95201005503af30ad548 Mon Sep 17 00:00:00 2001 From: Bernhard Urban-Forster Date: Tue, 12 Oct 2021 14:34:49 +0200 Subject: [PATCH 4/5] use {push,pop}_CPU_state and fix r18 usage there as well --- src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp | 11 +++++++++-- .../aarch64/templateInterpreterGenerator_aarch64.cpp | 11 ++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index 28a13ff3c0b0d..5c9b1fc327d09 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -2496,7 +2496,7 @@ void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { void MacroAssembler::push_CPU_state(bool save_vectors, bool use_sve, int sve_vector_size_in_bytes) { - push(0x3fffffff, sp); // integer registers except lr & sp + push(RegSet::range(r0, r29), sp); // integer registers except lr & sp if (save_vectors && use_sve && sve_vector_size_in_bytes > 16) { sub(sp, sp, sve_vector_size_in_bytes * FloatRegisterImpl::number_of_registers); for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) { @@ -2534,7 +2534,14 @@ void MacroAssembler::pop_CPU_state(bool restore_vectors, bool use_sve, reinitialize_ptrue(); } - pop(0x3fffffff, sp); // integer registers except lr & sp + // integer registers except lr & sp + pop(RegSet::range(r0, r17), sp); +#ifdef R18_RESERVED + ldp(zr, r19, Address(post(sp, 2 * wordSize))); + pop(RegSet::range(r20, r29), sp); +#else + pop(RegSet::range(r18_tls, r29), sp); +#endif } /** diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp index 27a6e084ebb9d..28bc92986fc76 100644 --- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp @@ -1397,18 +1397,11 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { __ cmp(rscratch1, (u1)StackOverflow::stack_guard_yellow_reserved_disabled); __ br(Assembler::NE, no_reguard); - __ push(RegSet::range(r0, r30), sp); + __ push_CPU_state(); __ mov(c_rarg0, rthread); __ mov(rscratch2, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); __ blr(rscratch2); - - __ pop(RegSet::range(r0, r17), sp); -#ifdef R18_RESERVED - __ ldp(zr, r19, Address(__ post(sp, 2 * wordSize))); - __ pop(RegSet::range(r20, r30), sp); -#else - __ pop(RegSet::range(r18_tls, r30), sp); -#endif + __ pop_CPU_state(); __ bind(no_reguard); } From 0bf88e8be81135f1ab081a82e218cacaf757f895 Mon Sep 17 00:00:00 2001 From: Bernhard Urban-Forster Date: Wed, 13 Oct 2021 10:58:03 +0200 Subject: [PATCH 5/5] Apply suggestions from code review Co-authored-by: Andrew Haley --- .../cpu/aarch64/templateInterpreterGenerator_aarch64.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp index 28bc92986fc76..e20cffd57670b 100644 --- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp @@ -1397,11 +1397,11 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { __ cmp(rscratch1, (u1)StackOverflow::stack_guard_yellow_reserved_disabled); __ br(Assembler::NE, no_reguard); - __ push_CPU_state(); + __ push_call_clobbered_registers(); __ mov(c_rarg0, rthread); __ mov(rscratch2, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); __ blr(rscratch2); - __ pop_CPU_state(); + __ pop_call_clobbered_registers(); __ bind(no_reguard); }