@@ -698,6 +698,7 @@ void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, in
698698// trampolines won't be emitted.
699699
700700address MacroAssembler::trampoline_call (Address entry, CodeBuffer *cbuf) {
701+ assert (JavaThread::current ()->is_Compiler_thread (), " just checking" );
701702 assert (entry.rspec ().type () == relocInfo::runtime_call_type
702703 || entry.rspec ().type () == relocInfo::opt_virtual_call_type
703704 || entry.rspec ().type () == relocInfo::static_call_type
@@ -4944,34 +4945,67 @@ void MacroAssembler::arrays_equals(Register a1, Register a2,
49444945}
49454946
49464947
4947- // base: Address of a buffer to be zeroed, 8 bytes aligned.
4948- // cnt: Count in HeapWords.
4949- // is_large: True when 'cnt' is known to be >= BlockZeroingLowLimit.
4950- void MacroAssembler::zero_words (Register base, Register cnt)
4948+ // The size of the blocks erased by the zero_blocks stub. We must
4949+ // handle anything smaller than this ourselves in zero_words().
4950+ const int MacroAssembler::zero_words_block_size = 8 ;
4951+
4952+ // zero_words() is used by C2 ClearArray patterns. It is as small as
4953+ // possible, handling small word counts locally and delegating
4954+ // anything larger to the zero_blocks stub. It is expanded many times
4955+ // in compiled code, so it is important to keep it short.
4956+
4957+ // ptr: Address of a buffer to be zeroed.
4958+ // cnt: Count in HeapWords.
4959+ //
4960+ // ptr, cnt, rscratch1, and rscratch2 are clobbered.
4961+ void MacroAssembler::zero_words (Register ptr, Register cnt)
49514962{
4952- if (UseBlockZeroing) {
4953- block_zero (base, cnt);
4954- } else {
4955- fill_words (base, cnt, zr);
4963+ assert (is_power_of_2 (zero_words_block_size), " adjust this" );
4964+ assert (ptr == r10 && cnt == r11, " mismatch in register usage" );
4965+
4966+ BLOCK_COMMENT (" zero_words {" );
4967+ cmp (cnt, zero_words_block_size);
4968+ Label around, done, done16;
4969+ br (LO, around);
4970+ {
4971+ RuntimeAddress zero_blocks = RuntimeAddress (StubRoutines::aarch64::zero_blocks ());
4972+ assert (zero_blocks.target () != NULL , " zero_blocks stub has not been generated" );
4973+ if (StubRoutines::aarch64::complete ()) {
4974+ trampoline_call (zero_blocks);
4975+ } else {
4976+ bl (zero_blocks);
4977+ }
4978+ }
4979+ bind (around);
4980+ for (int i = zero_words_block_size >> 1 ; i > 1 ; i >>= 1 ) {
4981+ Label l;
4982+ tbz (cnt, exact_log2 (i), l);
4983+ for (int j = 0 ; j < i; j += 2 ) {
4984+ stp (zr, zr, post (ptr, 16 ));
4985+ }
4986+ bind (l);
4987+ }
4988+ {
4989+ Label l;
4990+ tbz (cnt, 0 , l);
4991+ str (zr, Address (ptr));
4992+ bind (l);
49564993 }
4994+ BLOCK_COMMENT (" } zero_words" );
49574995}
49584996
4959- // r10 = base: Address of a buffer to be zeroed, 8 bytes aligned.
4997+ // base: Address of a buffer to be zeroed, 8 bytes aligned.
49604998// cnt: Immediate count in HeapWords.
4961- // r11 = tmp: For use as cnt if we need to call out
4962- #define ShortArraySize (18 * BytesPerLong)
4999+ #define SmallArraySize (18 * BytesPerLong)
49635000void MacroAssembler::zero_words (Register base, u_int64_t cnt)
49645001{
4965- Register tmp = r11 ;
5002+ BLOCK_COMMENT ( " zero_words { " ) ;
49665003 int i = cnt & 1 ; // store any odd word to start
49675004 if (i) str (zr, Address (base));
49685005
4969- if (cnt <= ShortArraySize / BytesPerLong) {
5006+ if (cnt <= SmallArraySize / BytesPerLong) {
49705007 for (; i < (int )cnt; i += 2 )
49715008 stp (zr, zr, Address (base, i * wordSize));
4972- } else if (UseBlockZeroing && cnt >= (u_int64_t )(BlockZeroingLowLimit >> LogBytesPerWord)) {
4973- mov (tmp, cnt);
4974- block_zero (base, tmp, true );
49755009 } else {
49765010 const int unroll = 4 ; // Number of stp(zr, zr) instructions we'll unroll
49775011 int remainder = cnt % (2 * unroll);
@@ -4992,6 +5026,51 @@ void MacroAssembler::zero_words(Register base, u_int64_t cnt)
49925026 stp (zr, zr, Address (pre (loop_base, 2 * unroll * wordSize)));
49935027 cbnz (cnt_reg, loop);
49945028 }
5029+ BLOCK_COMMENT (" } zero_words" );
5030+ }
5031+
5032+ // Zero blocks of memory by using DC ZVA.
5033+ //
5034+ // Aligns the base address first sufficently for DC ZVA, then uses
5035+ // DC ZVA repeatedly for every full block. cnt is the size to be
5036+ // zeroed in HeapWords. Returns the count of words left to be zeroed
5037+ // in cnt.
5038+ //
5039+ // NOTE: This is intended to be used in the zero_blocks() stub. If
5040+ // you want to use it elsewhere, note that cnt must be >= 2*zva_length.
5041+ void MacroAssembler::zero_dcache_blocks (Register base, Register cnt) {
5042+ Register tmp = rscratch1;
5043+ Register tmp2 = rscratch2;
5044+ int zva_length = VM_Version::zva_length ();
5045+ Label initial_table_end, loop_zva;
5046+ Label fini;
5047+
5048+ // Base must be 16 byte aligned. If not just return and let caller handle it
5049+ tst (base, 0x0f );
5050+ br (Assembler::NE, fini);
5051+ // Align base with ZVA length.
5052+ neg (tmp, base);
5053+ andr (tmp, tmp, zva_length - 1 );
5054+
5055+ // tmp: the number of bytes to be filled to align the base with ZVA length.
5056+ add (base, base, tmp);
5057+ sub (cnt, cnt, tmp, Assembler::ASR, 3 );
5058+ adr (tmp2, initial_table_end);
5059+ sub (tmp2, tmp2, tmp, Assembler::LSR, 2 );
5060+ br (tmp2);
5061+
5062+ for (int i = -zva_length + 16 ; i < 0 ; i += 16 )
5063+ stp (zr, zr, Address (base, i));
5064+ bind (initial_table_end);
5065+
5066+ sub (cnt, cnt, zva_length >> 3 );
5067+ bind (loop_zva);
5068+ dc (Assembler::ZVA, base);
5069+ subs (cnt, cnt, zva_length >> 3 );
5070+ add (base, base, zva_length);
5071+ br (Assembler::GE, loop_zva);
5072+ add (cnt, cnt, zva_length >> 3 ); // count not zeroed by DC ZVA
5073+ bind (fini);
49955074}
49965075
49975076// base: Address of a buffer to be filled, 8 bytes aligned.
@@ -5052,69 +5131,6 @@ void MacroAssembler::fill_words(Register base, Register cnt, Register value)
50525131 bind (fini);
50535132}
50545133
5055- // Use DC ZVA to do fast zeroing.
5056- // base: Address of a buffer to be zeroed, 8 bytes aligned.
5057- // cnt: Count in HeapWords.
5058- // is_large: True when 'cnt' is known to be >= BlockZeroingLowLimit.
5059- void MacroAssembler::block_zero (Register base, Register cnt, bool is_large)
5060- {
5061- Label small;
5062- Label store_pair, loop_store_pair, done;
5063- Label base_aligned;
5064-
5065- assert_different_registers (base, cnt, rscratch1);
5066- guarantee (base == r10 && cnt == r11, " fix register usage" );
5067-
5068- Register tmp = rscratch1;
5069- Register tmp2 = rscratch2;
5070- int zva_length = VM_Version::zva_length ();
5071-
5072- // Ensure ZVA length can be divided by 16. This is required by
5073- // the subsequent operations.
5074- assert (zva_length % 16 == 0 , " Unexpected ZVA Length" );
5075-
5076- if (!is_large) cbz (cnt, done);
5077- tbz (base, 3 , base_aligned);
5078- str (zr, Address (post (base, 8 )));
5079- sub (cnt, cnt, 1 );
5080- bind (base_aligned);
5081-
5082- // Ensure count >= zva_length * 2 so that it still deserves a zva after
5083- // alignment.
5084- if (!is_large || !(BlockZeroingLowLimit >= zva_length * 2 )) {
5085- int low_limit = MAX2 (zva_length * 2 , (int )BlockZeroingLowLimit);
5086- subs (tmp, cnt, low_limit >> 3 );
5087- br (Assembler::LT, small);
5088- }
5089-
5090- far_call (StubRoutines::aarch64::get_zero_longs ());
5091-
5092- bind (small);
5093-
5094- const int unroll = 8 ; // Number of stp instructions we'll unroll
5095- Label small_loop, small_table_end;
5096-
5097- andr (tmp, cnt, (unroll-1 ) * 2 );
5098- sub (cnt, cnt, tmp);
5099- add (base, base, tmp, Assembler::LSL, 3 );
5100- adr (tmp2, small_table_end);
5101- sub (tmp2, tmp2, tmp, Assembler::LSL, 1 );
5102- br (tmp2);
5103-
5104- bind (small_loop);
5105- add (base, base, unroll * 16 );
5106- for (int i = -unroll; i < 0 ; i++)
5107- stp (zr, zr, Address (base, i * 16 ));
5108- bind (small_table_end);
5109- subs (cnt, cnt, unroll * 2 );
5110- br (Assembler::GE, small_loop);
5111-
5112- tbz (cnt, 0 , done);
5113- str (zr, Address (post (base, 8 )));
5114-
5115- bind (done);
5116- }
5117-
51185134// Intrinsic for sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray and
51195135// java/lang/StringUTF16.compress.
51205136void MacroAssembler::encode_iso_array (Register src, Register dst,
0 commit comments