From f56b8e84d95296ba822cca80d3495980daa37dfe Mon Sep 17 00:00:00 2001 From: Richard Reingruber Date: Wed, 8 May 2024 15:50:03 +0200 Subject: [PATCH 1/9] 8331311: C2: Big Endian Port of 8318446: optimize stores into primitive arrays by combining values into larger store --- src/hotspot/share/opto/memnode.cpp | 31 +- .../jtreg/compiler/c2/TestMergeStores.java | 715 +++++++++++++----- .../ir_framework/test/IREncodingPrinter.java | 10 +- 3 files changed, 564 insertions(+), 192 deletions(-) diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index fa26825e19f50..8655a544dbf23 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -2881,14 +2881,14 @@ class ArrayPointer { // // RangeCheck[i+0] RangeCheck[i+0] // StoreB[i+0] -// RangeCheck[i+1] RangeCheck[i+1] +// RangeCheck[i+3] RangeCheck[i+3] // StoreB[i+1] --> pass: fail: // StoreB[i+2] StoreI[i+0] StoreB[i+0] // StoreB[i+3] // // The 4 StoreB are merged into a single StoreI node. We have to be careful with RangeCheck[i+1]: before // the optimization, if this RangeCheck[i+1] fails, then we execute only StoreB[i+0], and then trap. After -// the optimization, the new StoreI[i+0] is on the passing path of RangeCheck[i+1], and StoreB[i+0] on the +// the optimization, the new StoreI[i+0] is on the passing path of RangeCheck[i+3], and StoreB[i+0] on the // failing path. // // Note: For normal array stores, every store at first has a RangeCheck. But they can be removed with: @@ -2900,11 +2900,11 @@ class ArrayPointer { // RangeCheck[i+0] RangeCheck[i+0] <- before first store // StoreB[i+0] StoreB[i+0] <- first store // RangeCheck[i+1] --> smeared --> RangeCheck[i+3] <- only RC between first and last store -// StoreB[i+0] StoreB[i+1] <- second store +// StoreB[i+1] StoreB[i+1] <- second store // RangeCheck[i+2] --> removed -// StoreB[i+0] StoreB[i+2] +// StoreB[i+2] StoreB[i+2] // RangeCheck[i+3] --> removed -// StoreB[i+0] StoreB[i+3] <- last store +// StoreB[i+3] StoreB[i+3] <- last store // // Thus, it is a common pattern that between the first and last store in a chain // of adjacent stores there remains exactly one RangeCheck, located between the @@ -3067,6 +3067,11 @@ bool MergePrimitiveArrayStores::is_adjacent_input_pair(const Node* n1, const Nod } // Pattern: [n1 = base >> shift, n2 = base >> (shift + memory_size)] +#ifndef VM_LITTLE_ENDIAN + // Pattern: [n1 = base >> (shift + memory_size), n2 = base >> shift] + // Swapping n1 with n2 gives same pattern as on little endian platforms. + swap(n1, n2); +#endif // !VM_LITTLE_ENDIAN Node const* base_n2; jint shift_n2; if (!is_con_RShift(n2, base_n2, shift_n2)) { @@ -3281,8 +3286,13 @@ Node* MergePrimitiveArrayStores::make_merged_input_value(const Node_List& merge_ jlong mask = (((jlong)1) << bits_per_store) - 1; for (uint i = 0; i < merge_list.size(); i++) { jlong con_i = merge_list.at(i)->in(MemNode::ValueIn)->get_int(); +#ifdef VM_LITTLE_ENDIAN con = con << bits_per_store; con = con | (mask & con_i); +#else // VM_LITTLE_ENDIAN + con_i = (mask & con_i) << (i * bits_per_store); + con = con | con_i; +#endif // VM_LITTLE_ENDIAN } merged_input_value = _phase->longcon(con); } else { @@ -3290,10 +3300,17 @@ Node* MergePrimitiveArrayStores::make_merged_input_value(const Node_List& merge_ // | | // _store first // - merged_input_value = first->in(MemNode::ValueIn); Node const* base_last; jint shift_last; +#ifdef VM_LITTLE_ENDIAN + merged_input_value = first->in(MemNode::ValueIn); bool is_true = is_con_RShift(_store->in(MemNode::ValueIn), base_last, shift_last); +#else // VM_LITTLE_ENDIAN + // `_store` points to the lowest useing store in the Memory chain. On big endian it stores the + // unshifted `base`. `_store` and `first` need to be exchanged in the diagram above + merged_input_value = _store->in(MemNode::ValueIn); + bool is_true = is_con_RShift(first->in(MemNode::ValueIn), base_last, shift_last); +#endif // VM_LITTLE_ENDIAN assert(is_true, "must detect con RShift"); if (merged_input_value != base_last && merged_input_value->Opcode() == Op_ConvL2I) { // look through @@ -3473,7 +3490,6 @@ Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) { } } -#ifdef VM_LITTLE_ENDIAN if (MergeStores && UseUnalignedAccesses) { if (phase->C->post_loop_opts_phase()) { MergePrimitiveArrayStores merge(phase, this); @@ -3483,7 +3499,6 @@ Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) { phase->C->record_for_post_loop_opts_igvn(this); } } -#endif return nullptr; // No further progress } diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java index 555946502196b..378ad8a182f8e 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java @@ -54,6 +54,8 @@ public class TestMergeStores { private static final Unsafe UNSAFE = Unsafe.getUnsafe(); private static final Random RANDOM = Utils.getRandomInstance(); + private static final boolean IS_BIG_ENDIAN = UNSAFE.isBigEndian(); + // Inputs byte[] aB = new byte[RANGE]; byte[] bB = new byte[RANGE]; @@ -112,7 +114,7 @@ public TestMergeStores() { testGroups.get("test2").put("test2b", (_,_) -> { return test2b(aB.clone(), offset1, vL1); }); testGroups.get("test2").put("test2c", (_,_) -> { return test2c(aB.clone(), offset1, vL1); }); testGroups.get("test2").put("test2d", (_,_) -> { return test2d(aB.clone(), offset1, vL1); }); - testGroups.get("test2").put("test2e", (_,_) -> { return test2d(aB.clone(), offset1, vL1); }); + testGroups.get("test2").put("test2e", (_,_) -> { return test2e(aB.clone(), offset1, vL1); }); testGroups.put("test3", new HashMap()); testGroups.get("test3").put("test3R", (_,_) -> { return test3R(aB.clone(), offset1, vL1); }); @@ -191,6 +193,10 @@ public TestMergeStores() { testGroups.put("test700", new HashMap()); testGroups.get("test700").put("test700R", (_,i) -> { return test700R(aI.clone(), i); }); testGroups.get("test700").put("test700a", (_,i) -> { return test700a(aI.clone(), i); }); + + testGroups.put("test800", new HashMap()); + testGroups.get("test800").put("test800R", (_,_) -> { return test800R(aB.clone(), offset1, vL1); }); + testGroups.get("test800").put("test800a", (_,_) -> { return test800a(aB.clone(), offset1, vL1); }); } @Warmup(100) @@ -225,7 +231,8 @@ public TestMergeStores() { "test501a", "test502a", "test600a", - "test700a"}) + "test700a", + "test800a"}) public void runTests(RunInfo info) { // Repeat many times, so that we also have multiple iterations for post-warmup to potentially recompile int iters = info.isWarmUp() ? 1_000 : 50_000; @@ -412,6 +419,39 @@ static void storeLongLE(byte[] bytes, int offset, long value) { (byte)(value >> 56)); } + // ------------------------------------------- + // ------- Big-Endian API ---------- + // ------------------------------------------- + + // Store a short BE into an array using store bytes in an array + @ForceInline + static void storeShortBE(byte[] bytes, int offset, short value) { + storeBytes(bytes, offset, (byte)(value >> 8), + (byte)(value >> 0)); + } + + // Store an int BE into an array using store bytes in an array + @ForceInline + static void storeIntBE(byte[] bytes, int offset, int value) { + storeBytes(bytes, offset, (byte)(value >> 24), + (byte)(value >> 16), + (byte)(value >> 8 ), + (byte)(value >> 0 )); + } + + // Store an int BE into an array using store bytes in an array + @ForceInline + static void storeLongBE(byte[] bytes, int offset, long value) { + storeBytes(bytes, offset, (byte)(value >> 56), + (byte)(value >> 48), + (byte)(value >> 40), + (byte)(value >> 32), + (byte)(value >> 24), + (byte)(value >> 16), + (byte)(value >> 8 ), + (byte)(value >> 0 )); + } + // Store 2 bytes into an array @ForceInline static void storeBytes(byte[] bytes, int offset, byte b0, byte b1) { @@ -476,7 +516,7 @@ static Object[] test1a(byte[] a) { static Object[] test1b(byte[] a) { // Add custom null check, to ensure the unsafe access always recognizes its type as an array store if (a == null) {return null;} - UNSAFE.putLongUnaligned(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET, 0xdeadbeefbaadbabeL); + UNSAFE.putLongUnaligned(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET, 0xdeadbeefbaadbabeL, false /* bigEndian */); return new Object[]{ a }; } @@ -576,14 +616,25 @@ static Object[] test1i(byte[] a) { @DontCompile static Object[] test2R(byte[] a, int offset, long v) { - a[offset + 0] = (byte)(v >> 0); - a[offset + 1] = (byte)(v >> 8); - a[offset + 2] = (byte)(v >> 16); - a[offset + 3] = (byte)(v >> 24); - a[offset + 4] = (byte)(v >> 32); - a[offset + 5] = (byte)(v >> 40); - a[offset + 6] = (byte)(v >> 48); - a[offset + 7] = (byte)(v >> 56); + if (IS_BIG_ENDIAN) { + a[offset + 0] = (byte)(v >> 56); + a[offset + 1] = (byte)(v >> 48); + a[offset + 2] = (byte)(v >> 40); + a[offset + 3] = (byte)(v >> 32); + a[offset + 4] = (byte)(v >> 24); + a[offset + 5] = (byte)(v >> 16); + a[offset + 6] = (byte)(v >> 8); + a[offset + 7] = (byte)(v >> 0); + } else { + a[offset + 0] = (byte)(v >> 0); + a[offset + 1] = (byte)(v >> 8); + a[offset + 2] = (byte)(v >> 16); + a[offset + 3] = (byte)(v >> 24); + a[offset + 4] = (byte)(v >> 32); + a[offset + 5] = (byte)(v >> 40); + a[offset + 6] = (byte)(v >> 48); + a[offset + 7] = (byte)(v >> 56); + } return new Object[]{ a }; } @@ -591,14 +642,25 @@ static Object[] test2R(byte[] a, int offset, long v) { @IR(counts = {IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, applyIf = {"UseUnalignedAccesses", "true"}) static Object[] test2a(byte[] a, int offset, long v) { - a[offset + 0] = (byte)(v >> 0); - a[offset + 1] = (byte)(v >> 8); - a[offset + 2] = (byte)(v >> 16); - a[offset + 3] = (byte)(v >> 24); - a[offset + 4] = (byte)(v >> 32); - a[offset + 5] = (byte)(v >> 40); - a[offset + 6] = (byte)(v >> 48); - a[offset + 7] = (byte)(v >> 56); + if (IS_BIG_ENDIAN) { + a[offset + 0] = (byte)(v >> 56); + a[offset + 1] = (byte)(v >> 48); + a[offset + 2] = (byte)(v >> 40); + a[offset + 3] = (byte)(v >> 32); + a[offset + 4] = (byte)(v >> 24); + a[offset + 5] = (byte)(v >> 16); + a[offset + 6] = (byte)(v >> 8); + a[offset + 7] = (byte)(v >> 0); + } else { + a[offset + 0] = (byte)(v >> 0); + a[offset + 1] = (byte)(v >> 8); + a[offset + 2] = (byte)(v >> 16); + a[offset + 3] = (byte)(v >> 24); + a[offset + 4] = (byte)(v >> 32); + a[offset + 5] = (byte)(v >> 40); + a[offset + 6] = (byte)(v >> 48); + a[offset + 7] = (byte)(v >> 56); + } return new Object[]{ a }; } @@ -616,38 +678,65 @@ static Object[] test2b(byte[] a, int offset, long v) { @IR(counts = {IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, applyIf = {"UseUnalignedAccesses", "true"}) static Object[] test2c(byte[] a, int offset, long v) { - storeLongLE(a, offset, v); + if (IS_BIG_ENDIAN) { + storeLongBE(a, offset, v); + } else { + storeLongLE(a, offset, v); + } return new Object[]{ a }; } @Test // No optimization, casting long -> int -> byte does not work static Object[] test2d(byte[] a, int offset, long v) { - storeIntLE(a, offset + 0, (int)(v >> 0)); - storeIntLE(a, offset + 4, (int)(v >> 32)); + if (IS_BIG_ENDIAN) { + storeIntBE(a, offset + 0, (int)(v >> 32)); + storeIntBE(a, offset + 4, (int)(v >> 0)); + } else { + storeIntLE(a, offset + 0, (int)(v >> 0)); + storeIntLE(a, offset + 4, (int)(v >> 32)); + } return new Object[]{ a }; } @Test // No optimization, casting long -> short -> byte does not work static Object[] test2e(byte[] a, int offset, long v) { - storeShortLE(a, offset + 0, (short)(v >> 0)); - storeShortLE(a, offset + 2, (short)(v >> 16)); - storeShortLE(a, offset + 4, (short)(v >> 32)); - storeShortLE(a, offset + 6, (short)(v >> 48)); + if (IS_BIG_ENDIAN) { + storeShortBE(a, offset + 0, (short)(v >> 48)); + storeShortBE(a, offset + 2, (short)(v >> 32)); + storeShortBE(a, offset + 4, (short)(v >> 16)); + storeShortBE(a, offset + 6, (short)(v >> 0)); + } else { + storeShortLE(a, offset + 0, (short)(v >> 0)); + storeShortLE(a, offset + 2, (short)(v >> 16)); + storeShortLE(a, offset + 4, (short)(v >> 32)); + storeShortLE(a, offset + 6, (short)(v >> 48)); + } return new Object[]{ a }; } @DontCompile static Object[] test3R(byte[] a, int offset, long v) { - a[offset + 0] = (byte)(v >> 0); - a[offset + 1] = (byte)(v >> 8); - a[offset + 2] = (byte)(v >> 16); - a[offset + 3] = (byte)(v >> 24); - a[offset + 4] = (byte)(v >> 0); - a[offset + 5] = (byte)(v >> 8); - a[offset + 6] = (byte)(v >> 16); - a[offset + 7] = (byte)(v >> 24); + if (IS_BIG_ENDIAN) { + a[offset + 0] = (byte)(v >> 24); + a[offset + 1] = (byte)(v >> 16); + a[offset + 2] = (byte)(v >> 8); + a[offset + 3] = (byte)(v >> 0); + a[offset + 4] = (byte)(v >> 24); + a[offset + 5] = (byte)(v >> 16); + a[offset + 6] = (byte)(v >> 8); + a[offset + 7] = (byte)(v >> 0); + } else { + a[offset + 0] = (byte)(v >> 0); + a[offset + 1] = (byte)(v >> 8); + a[offset + 2] = (byte)(v >> 16); + a[offset + 3] = (byte)(v >> 24); + a[offset + 4] = (byte)(v >> 0); + a[offset + 5] = (byte)(v >> 8); + a[offset + 6] = (byte)(v >> 16); + a[offset + 7] = (byte)(v >> 24); + } return new Object[]{ a }; } @@ -655,14 +744,25 @@ static Object[] test3R(byte[] a, int offset, long v) { @IR(counts = {IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "2"}, applyIf = {"UseUnalignedAccesses", "true"}) static Object[] test3a(byte[] a, int offset, long v) { - a[offset + 0] = (byte)(v >> 0); - a[offset + 1] = (byte)(v >> 8); - a[offset + 2] = (byte)(v >> 16); - a[offset + 3] = (byte)(v >> 24); - a[offset + 4] = (byte)(v >> 0); - a[offset + 5] = (byte)(v >> 8); - a[offset + 6] = (byte)(v >> 16); - a[offset + 7] = (byte)(v >> 24); + if (IS_BIG_ENDIAN) { + a[offset + 0] = (byte)(v >> 24); + a[offset + 1] = (byte)(v >> 16); + a[offset + 2] = (byte)(v >> 8); + a[offset + 3] = (byte)(v >> 0); + a[offset + 4] = (byte)(v >> 24); + a[offset + 5] = (byte)(v >> 16); + a[offset + 6] = (byte)(v >> 8); + a[offset + 7] = (byte)(v >> 0); + } else { + a[offset + 0] = (byte)(v >> 0); + a[offset + 1] = (byte)(v >> 8); + a[offset + 2] = (byte)(v >> 16); + a[offset + 3] = (byte)(v >> 24); + a[offset + 4] = (byte)(v >> 0); + a[offset + 5] = (byte)(v >> 8); + a[offset + 6] = (byte)(v >> 16); + a[offset + 7] = (byte)(v >> 24); + } return new Object[]{ a }; } @@ -672,18 +772,32 @@ static Object[] test4R(byte[] a, int offset, long v1, int v2, short v3, byte v4) a[offset + 1] = (byte)0xFF; a[offset + 2] = v4; a[offset + 3] = (byte)0x42; - a[offset + 4] = (byte)(v1 >> 0); - a[offset + 5] = (byte)(v1 >> 8); + if (IS_BIG_ENDIAN) { + a[offset + 4] = (byte)(v1 >> 8); + a[offset + 5] = (byte)(v1 >> 0); + } else { + a[offset + 4] = (byte)(v1 >> 0); + a[offset + 5] = (byte)(v1 >> 8); + } a[offset + 6] = (byte)0xAB; a[offset + 7] = (byte)0xCD; a[offset + 8] = (byte)0xEF; a[offset + 9] = (byte)0x01; - a[offset + 10] = (byte)(v2 >> 0); - a[offset + 11] = (byte)(v2 >> 8); - a[offset + 12] = (byte)(v2 >> 16); - a[offset + 13] = (byte)(v2 >> 24); - a[offset + 14] = (byte)(v3 >> 0); - a[offset + 15] = (byte)(v3 >> 8); + if (IS_BIG_ENDIAN) { + a[offset + 10] = (byte)(v2 >> 24); + a[offset + 11] = (byte)(v2 >> 16); + a[offset + 12] = (byte)(v2 >> 8); + a[offset + 13] = (byte)(v2 >> 0); + a[offset + 14] = (byte)(v3 >> 8); + a[offset + 15] = (byte)(v3 >> 0); + } else { + a[offset + 10] = (byte)(v2 >> 0); + a[offset + 11] = (byte)(v2 >> 8); + a[offset + 12] = (byte)(v2 >> 16); + a[offset + 13] = (byte)(v2 >> 24); + a[offset + 14] = (byte)(v3 >> 0); + a[offset + 15] = (byte)(v3 >> 8); + } a[offset + 16] = (byte)0xEF; return new Object[]{ a }; } @@ -699,18 +813,32 @@ static Object[] test4a(byte[] a, int offset, long v1, int v2, short v3, byte v4) a[offset + 1] = (byte)0xFF; a[offset + 2] = v4; a[offset + 3] = (byte)0x42; - a[offset + 4] = (byte)(v1 >> 0); - a[offset + 5] = (byte)(v1 >> 8); + if (IS_BIG_ENDIAN) { + a[offset + 4] = (byte)(v1 >> 8); + a[offset + 5] = (byte)(v1 >> 0); + } else { + a[offset + 4] = (byte)(v1 >> 0); + a[offset + 5] = (byte)(v1 >> 8); + } a[offset + 6] = (byte)0xAB; a[offset + 7] = (byte)0xCD; a[offset + 8] = (byte)0xEF; a[offset + 9] = (byte)0x01; - a[offset + 10] = (byte)(v2 >> 0); - a[offset + 11] = (byte)(v2 >> 8); - a[offset + 12] = (byte)(v2 >> 16); - a[offset + 13] = (byte)(v2 >> 24); - a[offset + 14] = (byte)(v3 >> 0); - a[offset + 15] = (byte)(v3 >> 8); + if (IS_BIG_ENDIAN) { + a[offset + 10] = (byte)(v2 >> 24); + a[offset + 11] = (byte)(v2 >> 16); + a[offset + 12] = (byte)(v2 >> 8); + a[offset + 13] = (byte)(v2 >> 0); + a[offset + 14] = (byte)(v3 >> 8); + a[offset + 15] = (byte)(v3 >> 0); + } else { + a[offset + 10] = (byte)(v2 >> 0); + a[offset + 11] = (byte)(v2 >> 8); + a[offset + 12] = (byte)(v2 >> 16); + a[offset + 13] = (byte)(v2 >> 24); + a[offset + 14] = (byte)(v3 >> 0); + a[offset + 15] = (byte)(v3 >> 8); + } a[offset + 16] = (byte)0xEF; return new Object[]{ a }; } @@ -792,9 +920,15 @@ static Object[] test6a(byte[] a, byte[] b, int offset1, int offset2) { @DontCompile static Object[] test7R(byte[] a, int offset1, int v1) { - a[offset1 + 1] = (byte)(v1 >> 8); - a[offset1 + 2] = (byte)(v1 >> 16); - a[offset1 + 3] = (byte)(v1 >> 24); + if (IS_BIG_ENDIAN) { + a[offset1 + 1] = (byte)(v1 >> 24); + a[offset1 + 2] = (byte)(v1 >> 16); + a[offset1 + 3] = (byte)(v1 >> 8); + } else { + a[offset1 + 1] = (byte)(v1 >> 8); + a[offset1 + 2] = (byte)(v1 >> 16); + a[offset1 + 3] = (byte)(v1 >> 24); + } return new Object[]{ a }; } @@ -804,9 +938,15 @@ static Object[] test7R(byte[] a, int offset1, int v1) { IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}) static Object[] test7a(byte[] a, int offset1, int v1) { - a[offset1 + 1] = (byte)(v1 >> 8); - a[offset1 + 2] = (byte)(v1 >> 16); - a[offset1 + 3] = (byte)(v1 >> 24); + if (IS_BIG_ENDIAN) { + a[offset1 + 1] = (byte)(v1 >> 24); + a[offset1 + 2] = (byte)(v1 >> 16); + a[offset1 + 3] = (byte)(v1 >> 8); + } else { + a[offset1 + 1] = (byte)(v1 >> 8); + a[offset1 + 2] = (byte)(v1 >> 16); + a[offset1 + 3] = (byte)(v1 >> 24); + } return new Object[]{ a }; } @@ -904,18 +1044,33 @@ static Object[] test102R(short[] a, int offset, long v1, int v2, short v3) { a[offset + 1] = (short)0xFFFF; a[offset + 2] = v3; a[offset + 3] = (short)0x4242; - a[offset + 4] = (short)(v1 >> 0); - a[offset + 5] = (short)(v1 >> 16); - a[offset + 6] = (short)0xAB11; - a[offset + 7] = (short)0xCD36; - a[offset + 8] = (short)0xEF89; - a[offset + 9] = (short)0x0156; - a[offset + 10] = (short)(v1 >> 0); - a[offset + 11] = (short)(v1 >> 16); - a[offset + 12] = (short)(v1 >> 32); - a[offset + 13] = (short)(v1 >> 48); - a[offset + 14] = (short)(v2 >> 0); - a[offset + 15] = (short)(v2 >> 16); + if (IS_BIG_ENDIAN) { + a[offset + 4] = (short)(v1 >> 16); + a[offset + 5] = (short)(v1 >> 0); + a[offset + 6] = (short)0xAB11; + a[offset + 7] = (short)0xCD36; + a[offset + 8] = (short)0xEF89; + a[offset + 9] = (short)0x0156; + a[offset + 10] = (short)(v1 >> 48); + a[offset + 11] = (short)(v1 >> 32); + a[offset + 12] = (short)(v1 >> 16); + a[offset + 13] = (short)(v1 >> 0); + a[offset + 14] = (short)(v2 >> 16); + a[offset + 15] = (short)(v2 >> 0); + } else { + a[offset + 4] = (short)(v1 >> 0); + a[offset + 5] = (short)(v1 >> 16); + a[offset + 6] = (short)0xAB11; + a[offset + 7] = (short)0xCD36; + a[offset + 8] = (short)0xEF89; + a[offset + 9] = (short)0x0156; + a[offset + 10] = (short)(v1 >> 0); + a[offset + 11] = (short)(v1 >> 16); + a[offset + 12] = (short)(v1 >> 32); + a[offset + 13] = (short)(v1 >> 48); + a[offset + 14] = (short)(v2 >> 0); + a[offset + 15] = (short)(v2 >> 16); + } a[offset + 16] = (short)0xEFEF; return new Object[]{ a }; } @@ -931,18 +1086,33 @@ static Object[] test102a(short[] a, int offset, long v1, int v2, short v3) { a[offset + 1] = (short)0xFFFF; a[offset + 2] = v3; a[offset + 3] = (short)0x4242; - a[offset + 4] = (short)(v1 >> 0); - a[offset + 5] = (short)(v1 >> 16); - a[offset + 6] = (short)0xAB11; - a[offset + 7] = (short)0xCD36; - a[offset + 8] = (short)0xEF89; - a[offset + 9] = (short)0x0156; - a[offset + 10] = (short)(v1 >> 0); - a[offset + 11] = (short)(v1 >> 16); - a[offset + 12] = (short)(v1 >> 32); - a[offset + 13] = (short)(v1 >> 48); - a[offset + 14] = (short)(v2 >> 0); - a[offset + 15] = (short)(v2 >> 16); + if (IS_BIG_ENDIAN) { + a[offset + 4] = (short)(v1 >> 16); + a[offset + 5] = (short)(v1 >> 0); + a[offset + 6] = (short)0xAB11; + a[offset + 7] = (short)0xCD36; + a[offset + 8] = (short)0xEF89; + a[offset + 9] = (short)0x0156; + a[offset + 10] = (short)(v1 >> 48); + a[offset + 11] = (short)(v1 >> 32); + a[offset + 12] = (short)(v1 >> 16); + a[offset + 13] = (short)(v1 >> 0); + a[offset + 14] = (short)(v2 >> 16); + a[offset + 15] = (short)(v2 >> 0); + } else { + a[offset + 4] = (short)(v1 >> 0); + a[offset + 5] = (short)(v1 >> 16); + a[offset + 6] = (short)0xAB11; + a[offset + 7] = (short)0xCD36; + a[offset + 8] = (short)0xEF89; + a[offset + 9] = (short)0x0156; + a[offset + 10] = (short)(v1 >> 0); + a[offset + 11] = (short)(v1 >> 16); + a[offset + 12] = (short)(v1 >> 32); + a[offset + 13] = (short)(v1 >> 48); + a[offset + 14] = (short)(v2 >> 0); + a[offset + 15] = (short)(v2 >> 16); + } a[offset + 16] = (short)0xEFEF; return new Object[]{ a }; } @@ -1041,16 +1211,28 @@ static Object[] test202R(int[] a, int offset, long v1, int v2) { a[offset + 1] = 0xFFFFFFFF; a[offset + 2] = v2; a[offset + 3] = 0x42424242; - a[offset + 4] = (int)(v1 >> 0); - a[offset + 5] = (int)(v1 >> 32); + if (IS_BIG_ENDIAN) { + a[offset + 4] = (int)(v1 >> 32); + a[offset + 5] = (int)(v1 >> 0); + } else { + a[offset + 4] = (int)(v1 >> 0); + a[offset + 5] = (int)(v1 >> 32); + } a[offset + 6] = 0xAB110129; a[offset + 7] = 0xCD360183; a[offset + 8] = 0xEF890173; a[offset + 9] = 0x01560124; - a[offset + 10] = (int)(v1 >> 0); - a[offset + 11] = (int)(v1 >> 32); - a[offset + 12] = (int)(v1 >> 0); - a[offset + 13] = (int)(v1 >> 32); + if (IS_BIG_ENDIAN) { + a[offset + 10] = (int)(v1 >> 32); + a[offset + 11] = (int)(v1 >> 0); + a[offset + 12] = (int)(v1 >> 32); + a[offset + 13] = (int)(v1 >> 0); + } else { + a[offset + 10] = (int)(v1 >> 0); + a[offset + 11] = (int)(v1 >> 32); + a[offset + 12] = (int)(v1 >> 0); + a[offset + 13] = (int)(v1 >> 32); + } a[offset + 14] = v2; a[offset + 15] = v2; a[offset + 16] = 0xEFEFEFEF; @@ -1068,16 +1250,28 @@ static Object[] test202a(int[] a, int offset, long v1, int v2) { a[offset + 1] = 0xFFFFFFFF; a[offset + 2] = v2; a[offset + 3] = 0x42424242; - a[offset + 4] = (int)(v1 >> 0); - a[offset + 5] = (int)(v1 >> 32); + if (IS_BIG_ENDIAN) { + a[offset + 4] = (int)(v1 >> 32); + a[offset + 5] = (int)(v1 >> 0); + } else { + a[offset + 4] = (int)(v1 >> 0); + a[offset + 5] = (int)(v1 >> 32); + } a[offset + 6] = 0xAB110129; a[offset + 7] = 0xCD360183; a[offset + 8] = 0xEF890173; a[offset + 9] = 0x01560124; - a[offset + 10] = (int)(v1 >> 0); - a[offset + 11] = (int)(v1 >> 32); - a[offset + 12] = (int)(v1 >> 0); - a[offset + 13] = (int)(v1 >> 32); + if (IS_BIG_ENDIAN) { + a[offset + 10] = (int)(v1 >> 32); + a[offset + 11] = (int)(v1 >> 0); + a[offset + 12] = (int)(v1 >> 32); + a[offset + 13] = (int)(v1 >> 0); + } else { + a[offset + 10] = (int)(v1 >> 0); + a[offset + 11] = (int)(v1 >> 32); + a[offset + 12] = (int)(v1 >> 0); + a[offset + 13] = (int)(v1 >> 32); + } a[offset + 14] = v2; a[offset + 15] = v2; a[offset + 16] = 0xEFEFEFEF; @@ -1148,26 +1342,49 @@ static Object[] test400a(int[] a) { // 502a: during warmup never violate RangeCheck -> compile once with merged stores // but then after warmup violate RangeCheck -> recompile without merged stores static Object[] test500R(byte[] a, int offset, long v) { - int idx = 0; - try { - a[offset + 0] = (byte)(v >> 0); - idx = 1; - a[offset + 1] = (byte)(v >> 8); - idx = 2; - a[offset + 2] = (byte)(v >> 16); - idx = 3; - a[offset + 3] = (byte)(v >> 24); - idx = 4; - a[offset + 4] = (byte)(v >> 32); - idx = 5; - a[offset + 5] = (byte)(v >> 40); - idx = 6; - a[offset + 6] = (byte)(v >> 48); - idx = 7; - a[offset + 7] = (byte)(v >> 56); - idx = 8; - } catch (ArrayIndexOutOfBoundsException _) {} - return new Object[]{ a, new int[]{ idx } }; + if (IS_BIG_ENDIAN) { + int idx = 0; + try { + a[offset + 0] = (byte)(v >> 56); + idx = 1; + a[offset + 1] = (byte)(v >> 48); + idx = 2; + a[offset + 2] = (byte)(v >> 40); + idx = 3; + a[offset + 3] = (byte)(v >> 32); + idx = 4; + a[offset + 4] = (byte)(v >> 24); + idx = 5; + a[offset + 5] = (byte)(v >> 16); + idx = 6; + a[offset + 6] = (byte)(v >> 8); + idx = 7; + a[offset + 7] = (byte)(v >> 0); + idx = 8; + } catch (ArrayIndexOutOfBoundsException _) {} + return new Object[]{ a, new int[]{ idx } }; + } else { + int idx = 0; + try { + a[offset + 0] = (byte)(v >> 0); + idx = 1; + a[offset + 1] = (byte)(v >> 8); + idx = 2; + a[offset + 2] = (byte)(v >> 16); + idx = 3; + a[offset + 3] = (byte)(v >> 24); + idx = 4; + a[offset + 4] = (byte)(v >> 32); + idx = 5; + a[offset + 5] = (byte)(v >> 40); + idx = 6; + a[offset + 6] = (byte)(v >> 48); + idx = 7; + a[offset + 7] = (byte)(v >> 56); + idx = 8; + } catch (ArrayIndexOutOfBoundsException _) {} + return new Object[]{ a, new int[]{ idx } }; + } } @Test @@ -1177,82 +1394,165 @@ static Object[] test500R(byte[] a, int offset, long v) { IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // expect merged applyIf = {"UseUnalignedAccesses", "true"}) static Object[] test500a(byte[] a, int offset, long v) { - int idx = 0; - try { - a[offset + 0] = (byte)(v >> 0); - idx = 1; - a[offset + 1] = (byte)(v >> 8); - idx = 2; - a[offset + 2] = (byte)(v >> 16); - idx = 3; - a[offset + 3] = (byte)(v >> 24); - idx = 4; - a[offset + 4] = (byte)(v >> 32); - idx = 5; - a[offset + 5] = (byte)(v >> 40); - idx = 6; - a[offset + 6] = (byte)(v >> 48); - idx = 7; - a[offset + 7] = (byte)(v >> 56); - idx = 8; - } catch (ArrayIndexOutOfBoundsException _) {} - return new Object[]{ a, new int[]{ idx } }; + if (IS_BIG_ENDIAN) { + int idx = 0; + try { + a[offset + 0] = (byte)(v >> 56); + idx = 1; + a[offset + 1] = (byte)(v >> 48); + idx = 2; + a[offset + 2] = (byte)(v >> 40); + idx = 3; + a[offset + 3] = (byte)(v >> 32); + idx = 4; + a[offset + 4] = (byte)(v >> 24); + idx = 5; + a[offset + 5] = (byte)(v >> 16); + idx = 6; + a[offset + 6] = (byte)(v >> 8); + idx = 7; + a[offset + 7] = (byte)(v >> 0); + idx = 8; + } catch (ArrayIndexOutOfBoundsException _) {} + return new Object[]{ a, new int[]{ idx } }; + } else { + int idx = 0; + try { + a[offset + 0] = (byte)(v >> 0); + idx = 1; + a[offset + 1] = (byte)(v >> 8); + idx = 2; + a[offset + 2] = (byte)(v >> 16); + idx = 3; + a[offset + 3] = (byte)(v >> 24); + idx = 4; + a[offset + 4] = (byte)(v >> 32); + idx = 5; + a[offset + 5] = (byte)(v >> 40); + idx = 6; + a[offset + 6] = (byte)(v >> 48); + idx = 7; + a[offset + 7] = (byte)(v >> 56); + idx = 8; + } catch (ArrayIndexOutOfBoundsException _) {} + return new Object[]{ a, new int[]{ idx } }; + } } @Test @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", // No optimization because of too many RangeChecks IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", - IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}) + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, + applyIfPlatform = {"little-endian", "true"}) + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "7", + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"big-endian", "true"}) static Object[] test501a(byte[] a, int offset, long v) { - int idx = 0; - try { - a[offset + 0] = (byte)(v >> 0); - idx = 1; - a[offset + 1] = (byte)(v >> 8); - idx = 2; - a[offset + 2] = (byte)(v >> 16); - idx = 3; - a[offset + 3] = (byte)(v >> 24); - idx = 4; - a[offset + 4] = (byte)(v >> 32); - idx = 5; - a[offset + 5] = (byte)(v >> 40); - idx = 6; - a[offset + 6] = (byte)(v >> 48); - idx = 7; - a[offset + 7] = (byte)(v >> 56); - idx = 8; - } catch (ArrayIndexOutOfBoundsException _) {} - return new Object[]{ a, new int[]{ idx } }; + if (IS_BIG_ENDIAN) { + int idx = 0; + try { + a[offset + 0] = (byte)(v >> 56); + idx = 1; + a[offset + 1] = (byte)(v >> 48); + idx = 2; + a[offset + 2] = (byte)(v >> 40); + idx = 3; + a[offset + 3] = (byte)(v >> 32); + idx = 4; + a[offset + 4] = (byte)(v >> 24); + idx = 5; + a[offset + 5] = (byte)(v >> 16); + idx = 6; + a[offset + 6] = (byte)(v >> 8); // 2 lowest StoreB are merged. 7th StoreB is + idx = 7; // needed if RC for a[offset + 7] fails. + a[offset + 7] = (byte)(v >> 0); + idx = 8; + } catch (ArrayIndexOutOfBoundsException _) {} + return new Object[]{ a, new int[]{ idx } }; + } else { + int idx = 0; + try { + a[offset + 0] = (byte)(v >> 0); + idx = 1; + a[offset + 1] = (byte)(v >> 8); + idx = 2; + a[offset + 2] = (byte)(v >> 16); + idx = 3; + a[offset + 3] = (byte)(v >> 24); + idx = 4; + a[offset + 4] = (byte)(v >> 32); + idx = 5; + a[offset + 5] = (byte)(v >> 40); + idx = 6; + a[offset + 6] = (byte)(v >> 48); + idx = 7; + a[offset + 7] = (byte)(v >> 56); + idx = 8; + } catch (ArrayIndexOutOfBoundsException _) {} + return new Object[]{ a, new int[]{ idx } }; + } } @Test @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", // No optimization because of too many RangeChecks IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", - IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}) + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, + applyIfPlatform = {"little-endian", "true"}) + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "7", + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"big-endian", "true"}) static Object[] test502a(byte[] a, int offset, long v) { - int idx = 0; - try { - a[offset + 0] = (byte)(v >> 0); - idx = 1; - a[offset + 1] = (byte)(v >> 8); - idx = 2; - a[offset + 2] = (byte)(v >> 16); - idx = 3; - a[offset + 3] = (byte)(v >> 24); - idx = 4; - a[offset + 4] = (byte)(v >> 32); - idx = 5; - a[offset + 5] = (byte)(v >> 40); - idx = 6; - a[offset + 6] = (byte)(v >> 48); - idx = 7; - a[offset + 7] = (byte)(v >> 56); - idx = 8; - } catch (ArrayIndexOutOfBoundsException _) {} - return new Object[]{ a, new int[]{ idx } }; + if (IS_BIG_ENDIAN) { + int idx = 0; + try { + a[offset + 0] = (byte)(v >> 56); + idx = 1; + a[offset + 1] = (byte)(v >> 48); + idx = 2; + a[offset + 2] = (byte)(v >> 40); + idx = 3; + a[offset + 3] = (byte)(v >> 32); + idx = 4; + a[offset + 4] = (byte)(v >> 24); + idx = 5; + a[offset + 5] = (byte)(v >> 16); + idx = 6; + a[offset + 6] = (byte)(v >> 8); // 2 lowest StoreB are merged. 7th StoreB is + idx = 7; // needed if RC for a[offset + 7] fails. + a[offset + 7] = (byte)(v >> 0); + idx = 8; + } catch (ArrayIndexOutOfBoundsException _) {} + return new Object[]{ a, new int[]{ idx } }; + } else { + int idx = 0; + try { + a[offset + 0] = (byte)(v >> 0); + idx = 1; + a[offset + 1] = (byte)(v >> 8); + idx = 2; + a[offset + 2] = (byte)(v >> 16); + idx = 3; + a[offset + 3] = (byte)(v >> 24); + idx = 4; + a[offset + 4] = (byte)(v >> 32); + idx = 5; + a[offset + 5] = (byte)(v >> 40); + idx = 6; + a[offset + 6] = (byte)(v >> 48); + idx = 7; + a[offset + 7] = (byte)(v >> 56); + idx = 8; + } catch (ArrayIndexOutOfBoundsException _) {} + return new Object[]{ a, new int[]{ idx } }; + } } @DontCompile @@ -1319,4 +1619,55 @@ static Object[] test700a(int[] a, long v1) { a[1] = (int)(v1 >> -2); return new Object[]{ a }; } + + @DontCompile + static Object[] test800R(byte[] a, int offset, long v) { + if (IS_BIG_ENDIAN) { + a[offset + 0] = (byte)(v >> 40); + a[offset + 1] = (byte)(v >> 32); + a[offset + 2] = (byte)(v >> 24); + a[offset + 3] = (byte)(v >> 16); + a[offset + 4] = (byte)(v >> 8); + a[offset + 5] = (byte)(v >> 0); + } else { + a[offset + 0] = (byte)(v >> 0); + a[offset + 1] = (byte)(v >> 8); + a[offset + 2] = (byte)(v >> 16); + a[offset + 3] = (byte)(v >> 24); + a[offset + 4] = (byte)(v >> 32); + a[offset + 5] = (byte)(v >> 40); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "6", + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, + applyIfPlatform = {"little-endian", "true"}) + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "2", + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"big-endian", "true"}) + static Object[] test800a(byte[] a, int offset, long v) { + if (IS_BIG_ENDIAN) { + a[offset + 0] = (byte)(v >> 40); + a[offset + 1] = (byte)(v >> 32); + a[offset + 2] = (byte)(v >> 24); // The lowest stores in the Memory chain can be merged. + a[offset + 3] = (byte)(v >> 16); // This is possible because the input for the merged store + a[offset + 4] = (byte)(v >> 8); // does not require a right shift. + a[offset + 5] = (byte)(v >> 0); + } else { + a[offset + 0] = (byte)(v >> 0); + a[offset + 1] = (byte)(v >> 8); + a[offset + 2] = (byte)(v >> 16); // The merge is tried with the lowest store in the Memory chain. + a[offset + 3] = (byte)(v >> 24); // It fails because the 2 highest stores are ignored aiming for a 4 byte store + a[offset + 4] = (byte)(v >> 32); // but this would then require a right shift by 16 to get the input + a[offset + 5] = (byte)(v >> 40); // for the merge store. + } + return new Object[]{ a }; + } } diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java b/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java index 47cec9f5e13b4..e1bc34e967aed 100644 --- a/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java +++ b/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java @@ -31,6 +31,7 @@ import jdk.test.whitebox.WhiteBox; import java.lang.reflect.Method; +import java.nio.ByteOrder; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -74,7 +75,10 @@ public class IREncodingPrinter { "x86", // corresponds to vm.bits "32-bit", - "64-bit" + "64-bit", + // java.nio.ByteOrder + "little-endian", + "big-endian" )); // Please verify new CPU features before adding them. If we allow non-existent features @@ -356,7 +360,9 @@ private boolean checkPlatform(String platform, String value) { arch = "x86"; } - String currentPlatform = os + " " + arch + " " + (Platform.is32bit() ? "32-bit" : "64-bit"); + String endianess = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)? "big-endian" : "little-endian"; + + String currentPlatform = os + " " + arch + " " + (Platform.is32bit() ? "32-bit" : "64-bit") + " " + endianess; return (trueValue && currentPlatform.contains(platform)) || (falseValue && !currentPlatform.contains(platform)); } From d55ebb61e850cbdbf2bb2e9379452e08ece885cb Mon Sep 17 00:00:00 2001 From: Richard Reingruber Date: Mon, 13 May 2024 18:02:44 +0200 Subject: [PATCH 2/9] Typo --- src/hotspot/share/opto/memnode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index 8655a544dbf23..7e3f45a0336b9 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -3306,7 +3306,7 @@ Node* MergePrimitiveArrayStores::make_merged_input_value(const Node_List& merge_ merged_input_value = first->in(MemNode::ValueIn); bool is_true = is_con_RShift(_store->in(MemNode::ValueIn), base_last, shift_last); #else // VM_LITTLE_ENDIAN - // `_store` points to the lowest useing store in the Memory chain. On big endian it stores the + // `_store` points to the lowest using store in the Memory chain. On big endian it stores the // unshifted `base`. `_store` and `first` need to be exchanged in the diagram above merged_input_value = _store->in(MemNode::ValueIn); bool is_true = is_con_RShift(first->in(MemNode::ValueIn), base_last, shift_last); From 63e37a1b8faea22d6caf25ebce94e52973fbb8a3 Mon Sep 17 00:00:00 2001 From: Richard Reingruber Date: Tue, 14 May 2024 09:04:16 +0200 Subject: [PATCH 3/9] Add bug id --- test/hotspot/jtreg/compiler/c2/TestMergeStores.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java index 378ad8a182f8e..1461ff6e10477 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java @@ -33,7 +33,7 @@ /* * @test - * @bug 8318446 8331054 + * @bug 8318446 8331054 8331311 * @summary Test merging of consecutive stores * @modules java.base/jdk.internal.misc * @library /test/lib / @@ -42,7 +42,7 @@ /* * @test - * @bug 8318446 8331054 + * @bug 8318446 8331054 8331311 * @summary Test merging of consecutive stores * @modules java.base/jdk.internal.misc * @library /test/lib / From 9cbe9642a05492d1fd0c1c1511505a0e2793ab29 Mon Sep 17 00:00:00 2001 From: Richard Reingruber Date: Tue, 14 May 2024 14:30:36 +0200 Subject: [PATCH 4/9] Improve comment --- .../jtreg/compiler/c2/TestMergeStores.java | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java index 1461ff6e10477..82fdb33a77338 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java @@ -1653,20 +1653,23 @@ static Object[] test800R(byte[] a, int offset, long v) { applyIf = {"UseUnalignedAccesses", "true"}, applyIfPlatform = {"big-endian", "true"}) static Object[] test800a(byte[] a, int offset, long v) { + // Merge attempts begin at the lowest store in the Memory chain. + // Candidates are found following the chain. The list is trimmed to a + // power of 2 length by removing higher stores. if (IS_BIG_ENDIAN) { - a[offset + 0] = (byte)(v >> 40); - a[offset + 1] = (byte)(v >> 32); - a[offset + 2] = (byte)(v >> 24); // The lowest stores in the Memory chain can be merged. - a[offset + 3] = (byte)(v >> 16); // This is possible because the input for the merged store - a[offset + 4] = (byte)(v >> 8); // does not require a right shift. + a[offset + 0] = (byte)(v >> 40); // Removed from candidate list + a[offset + 1] = (byte)(v >> 32); // Removed from candidate list + a[offset + 2] = (byte)(v >> 24); // The 4 following stores are on the candidate list + a[offset + 3] = (byte)(v >> 16); // and they are successfully merged. + a[offset + 4] = (byte)(v >> 8); a[offset + 5] = (byte)(v >> 0); } else { - a[offset + 0] = (byte)(v >> 0); - a[offset + 1] = (byte)(v >> 8); - a[offset + 2] = (byte)(v >> 16); // The merge is tried with the lowest store in the Memory chain. - a[offset + 3] = (byte)(v >> 24); // It fails because the 2 highest stores are ignored aiming for a 4 byte store - a[offset + 4] = (byte)(v >> 32); // but this would then require a right shift by 16 to get the input - a[offset + 5] = (byte)(v >> 40); // for the merge store. + a[offset + 0] = (byte)(v >> 0); // Removed from candidate list + a[offset + 1] = (byte)(v >> 8); // Removed from candidate list + a[offset + 2] = (byte)(v >> 16); // The 4 following stores are on the candidate list. + a[offset + 3] = (byte)(v >> 24); // They cannot be merged though because this would require shifting + a[offset + 4] = (byte)(v >> 32); // The input. + a[offset + 5] = (byte)(v >> 40); } return new Object[]{ a }; } From dc05bb0b8921c1538cefd967b9ae739e7e02c890 Mon Sep 17 00:00:00 2001 From: Richard Reingruber Date: Wed, 15 May 2024 09:14:44 +0200 Subject: [PATCH 5/9] Improve comment --- test/hotspot/jtreg/compiler/c2/TestMergeStores.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java index 82fdb33a77338..e53be324aa361 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java @@ -1667,8 +1667,8 @@ static Object[] test800a(byte[] a, int offset, long v) { a[offset + 0] = (byte)(v >> 0); // Removed from candidate list a[offset + 1] = (byte)(v >> 8); // Removed from candidate list a[offset + 2] = (byte)(v >> 16); // The 4 following stores are on the candidate list. - a[offset + 3] = (byte)(v >> 24); // They cannot be merged though because this would require shifting - a[offset + 4] = (byte)(v >> 32); // The input. + a[offset + 3] = (byte)(v >> 24); // The current logic does not merge them + a[offset + 4] = (byte)(v >> 32); // since it would require shifting the input. a[offset + 5] = (byte)(v >> 40); } return new Object[]{ a }; From 6ba191527475c3b426c0060122ca85212b878758 Mon Sep 17 00:00:00 2001 From: Richard Reingruber Date: Wed, 15 May 2024 10:14:59 +0200 Subject: [PATCH 6/9] Improve make_merged_input_value based on Emanuel's feedback --- src/hotspot/share/opto/memnode.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index 7e3f45a0336b9..f426a4200df1c 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -3300,23 +3300,24 @@ Node* MergePrimitiveArrayStores::make_merged_input_value(const Node_List& merge_ // | | // _store first // - Node const* base_last; - jint shift_last; #ifdef VM_LITTLE_ENDIAN - merged_input_value = first->in(MemNode::ValueIn); - bool is_true = is_con_RShift(_store->in(MemNode::ValueIn), base_last, shift_last); + Node* hi = _store->in(MemNode::ValueIn); + Node* lo = first->in(MemNode::ValueIn); #else // VM_LITTLE_ENDIAN - // `_store` points to the lowest using store in the Memory chain. On big endian it stores the - // unshifted `base`. `_store` and `first` need to be exchanged in the diagram above - merged_input_value = _store->in(MemNode::ValueIn); - bool is_true = is_con_RShift(first->in(MemNode::ValueIn), base_last, shift_last); + // `_store` and `first` are swapped in the diagram above + Node* hi = first->in(MemNode::ValueIn); + Node* lo = _store->in(MemNode::ValueIn); #endif // VM_LITTLE_ENDIAN + Node const* hi_base; + jint hi_shift; + merged_input_value = lo; + bool is_true = is_con_RShift(hi, hi_base, hi_shift); assert(is_true, "must detect con RShift"); - if (merged_input_value != base_last && merged_input_value->Opcode() == Op_ConvL2I) { + if (merged_input_value != hi_base && merged_input_value->Opcode() == Op_ConvL2I) { // look through merged_input_value = merged_input_value->in(1); } - if (merged_input_value != base_last) { + if (merged_input_value != hi_base) { // merged_input_value is not the base return nullptr; } From 8844c837ff139acb5ba6c0e58c620d341ee6fe3d Mon Sep 17 00:00:00 2001 From: Richard Reingruber Date: Wed, 15 May 2024 14:41:07 +0200 Subject: [PATCH 7/9] test2BE: big endian version of test2 --- .../jtreg/compiler/c2/TestMergeStores.java | 191 ++++++++++++------ 1 file changed, 127 insertions(+), 64 deletions(-) diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java index e53be324aa361..6793edeb6761a 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java @@ -116,6 +116,14 @@ public TestMergeStores() { testGroups.get("test2").put("test2d", (_,_) -> { return test2d(aB.clone(), offset1, vL1); }); testGroups.get("test2").put("test2e", (_,_) -> { return test2e(aB.clone(), offset1, vL1); }); + testGroups.put("test2BE", new HashMap()); + testGroups.get("test2BE").put("test2RBE", (_,_) -> { return test2RBE(aB.clone(), offset1, vL1); }); + testGroups.get("test2BE").put("test2aBE", (_,_) -> { return test2aBE(aB.clone(), offset1, vL1); }); + testGroups.get("test2BE").put("test2bBE", (_,_) -> { return test2bBE(aB.clone(), offset1, vL1); }); + testGroups.get("test2BE").put("test2cBE", (_,_) -> { return test2cBE(aB.clone(), offset1, vL1); }); + testGroups.get("test2BE").put("test2dBE", (_,_) -> { return test2dBE(aB.clone(), offset1, vL1); }); + testGroups.get("test2BE").put("test2eBE", (_,_) -> { return test2eBE(aB.clone(), offset1, vL1); }); + testGroups.put("test3", new HashMap()); testGroups.get("test3").put("test3R", (_,_) -> { return test3R(aB.clone(), offset1, vL1); }); testGroups.get("test3").put("test3a", (_,_) -> { return test3a(aB.clone(), offset1, vL1); }); @@ -214,6 +222,11 @@ public TestMergeStores() { "test2c", "test2d", "test2e", + "test2aBE", + "test2bBE", + "test2cBE", + "test2dBE", + "test2eBE", "test3a", "test4a", "test5a", @@ -616,51 +629,35 @@ static Object[] test1i(byte[] a) { @DontCompile static Object[] test2R(byte[] a, int offset, long v) { - if (IS_BIG_ENDIAN) { - a[offset + 0] = (byte)(v >> 56); - a[offset + 1] = (byte)(v >> 48); - a[offset + 2] = (byte)(v >> 40); - a[offset + 3] = (byte)(v >> 32); - a[offset + 4] = (byte)(v >> 24); - a[offset + 5] = (byte)(v >> 16); - a[offset + 6] = (byte)(v >> 8); - a[offset + 7] = (byte)(v >> 0); - } else { - a[offset + 0] = (byte)(v >> 0); - a[offset + 1] = (byte)(v >> 8); - a[offset + 2] = (byte)(v >> 16); - a[offset + 3] = (byte)(v >> 24); - a[offset + 4] = (byte)(v >> 32); - a[offset + 5] = (byte)(v >> 40); - a[offset + 6] = (byte)(v >> 48); - a[offset + 7] = (byte)(v >> 56); - } + a[offset + 0] = (byte)(v >> 0); + a[offset + 1] = (byte)(v >> 8); + a[offset + 2] = (byte)(v >> 16); + a[offset + 3] = (byte)(v >> 24); + a[offset + 4] = (byte)(v >> 32); + a[offset + 5] = (byte)(v >> 40); + a[offset + 6] = (byte)(v >> 48); + a[offset + 7] = (byte)(v >> 56); return new Object[]{ a }; } @Test @IR(counts = {IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, - applyIf = {"UseUnalignedAccesses", "true"}) + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"little-endian", "true"}) + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, + applyIfPlatform = {"big-endian", "true"}) static Object[] test2a(byte[] a, int offset, long v) { - if (IS_BIG_ENDIAN) { - a[offset + 0] = (byte)(v >> 56); - a[offset + 1] = (byte)(v >> 48); - a[offset + 2] = (byte)(v >> 40); - a[offset + 3] = (byte)(v >> 32); - a[offset + 4] = (byte)(v >> 24); - a[offset + 5] = (byte)(v >> 16); - a[offset + 6] = (byte)(v >> 8); - a[offset + 7] = (byte)(v >> 0); - } else { - a[offset + 0] = (byte)(v >> 0); - a[offset + 1] = (byte)(v >> 8); - a[offset + 2] = (byte)(v >> 16); - a[offset + 3] = (byte)(v >> 24); - a[offset + 4] = (byte)(v >> 32); - a[offset + 5] = (byte)(v >> 40); - a[offset + 6] = (byte)(v >> 48); - a[offset + 7] = (byte)(v >> 56); - } + a[offset + 0] = (byte)(v >> 0); + a[offset + 1] = (byte)(v >> 8); + a[offset + 2] = (byte)(v >> 16); + a[offset + 3] = (byte)(v >> 24); + a[offset + 4] = (byte)(v >> 32); + a[offset + 5] = (byte)(v >> 40); + a[offset + 6] = (byte)(v >> 48); + a[offset + 7] = (byte)(v >> 56); return new Object[]{ a }; } @@ -670,49 +667,115 @@ static Object[] test2a(byte[] a, int offset, long v) { static Object[] test2b(byte[] a, int offset, long v) { // Add custom null check, to ensure the unsafe access always recognizes its type as an array store if (a == null) {return null;} - UNSAFE.putLongUnaligned(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset, v); + UNSAFE.putLongUnaligned(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset, v, false /* bigEndian */); return new Object[]{ a }; } @Test @IR(counts = {IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, - applyIf = {"UseUnalignedAccesses", "true"}) + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"little-endian", "true"}) + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, + applyIfPlatform = {"big-endian", "true"}) static Object[] test2c(byte[] a, int offset, long v) { - if (IS_BIG_ENDIAN) { - storeLongBE(a, offset, v); - } else { - storeLongLE(a, offset, v); - } + storeLongLE(a, offset, v); return new Object[]{ a }; } @Test // No optimization, casting long -> int -> byte does not work static Object[] test2d(byte[] a, int offset, long v) { - if (IS_BIG_ENDIAN) { - storeIntBE(a, offset + 0, (int)(v >> 32)); - storeIntBE(a, offset + 4, (int)(v >> 0)); - } else { - storeIntLE(a, offset + 0, (int)(v >> 0)); - storeIntLE(a, offset + 4, (int)(v >> 32)); - } + storeIntLE(a, offset + 0, (int)(v >> 0)); + storeIntLE(a, offset + 4, (int)(v >> 32)); return new Object[]{ a }; } @Test // No optimization, casting long -> short -> byte does not work static Object[] test2e(byte[] a, int offset, long v) { - if (IS_BIG_ENDIAN) { - storeShortBE(a, offset + 0, (short)(v >> 48)); - storeShortBE(a, offset + 2, (short)(v >> 32)); - storeShortBE(a, offset + 4, (short)(v >> 16)); - storeShortBE(a, offset + 6, (short)(v >> 0)); - } else { - storeShortLE(a, offset + 0, (short)(v >> 0)); - storeShortLE(a, offset + 2, (short)(v >> 16)); - storeShortLE(a, offset + 4, (short)(v >> 32)); - storeShortLE(a, offset + 6, (short)(v >> 48)); - } + storeShortLE(a, offset + 0, (short)(v >> 0)); + storeShortLE(a, offset + 2, (short)(v >> 16)); + storeShortLE(a, offset + 4, (short)(v >> 32)); + storeShortLE(a, offset + 6, (short)(v >> 48)); + return new Object[]{ a }; + } + + @DontCompile + static Object[] test2RBE(byte[] a, int offset, long v) { + a[offset + 0] = (byte)(v >> 56); + a[offset + 1] = (byte)(v >> 48); + a[offset + 2] = (byte)(v >> 40); + a[offset + 3] = (byte)(v >> 32); + a[offset + 4] = (byte)(v >> 24); + a[offset + 5] = (byte)(v >> 16); + a[offset + 6] = (byte)(v >> 8); + a[offset + 7] = (byte)(v >> 0); + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, + applyIfPlatform = {"little-endian", "true"}) + @IR(counts = {IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"big-endian", "true"}) + static Object[] test2aBE(byte[] a, int offset, long v) { + a[offset + 0] = (byte)(v >> 56); + a[offset + 1] = (byte)(v >> 48); + a[offset + 2] = (byte)(v >> 40); + a[offset + 3] = (byte)(v >> 32); + a[offset + 4] = (byte)(v >> 24); + a[offset + 5] = (byte)(v >> 16); + a[offset + 6] = (byte)(v >> 8); + a[offset + 7] = (byte)(v >> 0); + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, + applyIf = {"UseUnalignedAccesses", "true"}) + static Object[] test2bBE(byte[] a, int offset, long v) { + // Add custom null check, to ensure the unsafe access always recognizes its type as an array store + if (a == null) {return null;} + UNSAFE.putLongUnaligned(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset, v, true /* bigEndian */); + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, + applyIfPlatform = {"little-endian", "true"}) + @IR(counts = {IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"big-endian", "true"}) + static Object[] test2cBE(byte[] a, int offset, long v) { + storeLongBE(a, offset, v); + return new Object[]{ a }; + } + + @Test + // No optimization, casting long -> int -> byte does not work + static Object[] test2dBE(byte[] a, int offset, long v) { + storeIntBE(a, offset + 0, (int)(v >> 32)); + storeIntBE(a, offset + 4, (int)(v >> 0)); + return new Object[]{ a }; + } + + @Test + // No optimization, casting long -> short -> byte does not work + static Object[] test2eBE(byte[] a, int offset, long v) { + storeShortBE(a, offset + 0, (short)(v >> 48)); + storeShortBE(a, offset + 2, (short)(v >> 32)); + storeShortBE(a, offset + 4, (short)(v >> 16)); + storeShortBE(a, offset + 6, (short)(v >> 0)); return new Object[]{ a }; } From 3169a3104b7323c4ff6f2714449a7c28025d0bba Mon Sep 17 00:00:00 2001 From: Richard Reingruber Date: Wed, 15 May 2024 17:18:49 +0200 Subject: [PATCH 8/9] Eliminate IS_BIG_ENDIAN and always execute both variants --- .../jtreg/compiler/c2/TestMergeStores.java | 1035 ++++++++++------- 1 file changed, 631 insertions(+), 404 deletions(-) diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java index 6793edeb6761a..0e86045618b9a 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java @@ -54,8 +54,6 @@ public class TestMergeStores { private static final Unsafe UNSAFE = Unsafe.getUnsafe(); private static final Random RANDOM = Utils.getRandomInstance(); - private static final boolean IS_BIG_ENDIAN = UNSAFE.isBigEndian(); - // Inputs byte[] aB = new byte[RANGE]; byte[] bB = new byte[RANGE]; @@ -128,10 +126,18 @@ public TestMergeStores() { testGroups.get("test3").put("test3R", (_,_) -> { return test3R(aB.clone(), offset1, vL1); }); testGroups.get("test3").put("test3a", (_,_) -> { return test3a(aB.clone(), offset1, vL1); }); + testGroups.put("test3BE", new HashMap()); + testGroups.get("test3BE").put("test3RBE", (_,_) -> { return test3RBE(aB.clone(), offset1, vL1); }); + testGroups.get("test3BE").put("test3aBE", (_,_) -> { return test3aBE(aB.clone(), offset1, vL1); }); + testGroups.put("test4", new HashMap()); testGroups.get("test4").put("test4R", (_,_) -> { return test4R(aB.clone(), offset1, vL1, vI1, vS1, vB1); }); testGroups.get("test4").put("test4a", (_,_) -> { return test4a(aB.clone(), offset1, vL1, vI1, vS1, vB1); }); + testGroups.put("test4BE", new HashMap()); + testGroups.get("test4BE").put("test4RBE", (_,_) -> { return test4RBE(aB.clone(), offset1, vL1, vI1, vS1, vB1); }); + testGroups.get("test4BE").put("test4aBE", (_,_) -> { return test4aBE(aB.clone(), offset1, vL1, vI1, vS1, vB1); }); + testGroups.put("test5", new HashMap()); testGroups.get("test5").put("test5R", (_,_) -> { return test5R(aB.clone(), offset1); }); testGroups.get("test5").put("test5a", (_,_) -> { return test5a(aB.clone(), offset1); }); @@ -144,6 +150,10 @@ public TestMergeStores() { testGroups.get("test7").put("test7R", (_,_) -> { return test7R(aB.clone(), offset1, vI1); }); testGroups.get("test7").put("test7a", (_,_) -> { return test7a(aB.clone(), offset1, vI1); }); + testGroups.put("test7BE", new HashMap()); + testGroups.get("test7BE").put("test7RBE", (_,_) -> { return test7RBE(aB.clone(), offset1, vI1); }); + testGroups.get("test7BE").put("test7aBE", (_,_) -> { return test7aBE(aB.clone(), offset1, vI1); }); + testGroups.put("test100", new HashMap()); testGroups.get("test100").put("test100R", (_,_) -> { return test100R(aS.clone(), offset1); }); testGroups.get("test100").put("test100a", (_,_) -> { return test100a(aS.clone(), offset1); }); @@ -156,6 +166,10 @@ public TestMergeStores() { testGroups.get("test102").put("test102R", (_,_) -> { return test102R(aS.clone(), offset1, vL1, vI1, vS1); }); testGroups.get("test102").put("test102a", (_,_) -> { return test102a(aS.clone(), offset1, vL1, vI1, vS1); }); + testGroups.put("test102BE", new HashMap()); + testGroups.get("test102BE").put("test102RBE", (_,_) -> { return test102RBE(aS.clone(), offset1, vL1, vI1, vS1); }); + testGroups.get("test102BE").put("test102aBE", (_,_) -> { return test102aBE(aS.clone(), offset1, vL1, vI1, vS1); }); + testGroups.put("test200", new HashMap()); testGroups.get("test200").put("test200R", (_,_) -> { return test200R(aI.clone(), offset1); }); testGroups.get("test200").put("test200a", (_,_) -> { return test200a(aI.clone(), offset1); }); @@ -168,6 +182,10 @@ public TestMergeStores() { testGroups.get("test202").put("test202R", (_,_) -> { return test202R(aI.clone(), offset1, vL1, vI1); }); testGroups.get("test202").put("test202a", (_,_) -> { return test202a(aI.clone(), offset1, vL1, vI1); }); + testGroups.put("test202BE", new HashMap()); + testGroups.get("test202BE").put("test202RBE", (_,_) -> { return test202RBE(aI.clone(), offset1, vL1, vI1); }); + testGroups.get("test202BE").put("test202aBE", (_,_) -> { return test202aBE(aI.clone(), offset1, vL1, vI1); }); + testGroups.put("test300", new HashMap()); testGroups.get("test300").put("test300R", (_,_) -> { return test300R(aI.clone()); }); testGroups.get("test300").put("test300a", (_,_) -> { return test300a(aI.clone()); }); @@ -194,6 +212,24 @@ public TestMergeStores() { // First use something in range, and after warmup randomize going outside the range. // Consequence: all RangeChecks stay in the final compilation. + testGroups.put("test500BE", new HashMap()); + testGroups.get("test500BE").put("test500RBE", (_,_) -> { return test500RBE(aB.clone(), offset1, vL1); }); + testGroups.get("test500BE").put("test500aBE", (_,_) -> { return test500aBE(aB.clone(), offset1, vL1); }); + + testGroups.put("test501BE", new HashMap()); + testGroups.get("test501BE").put("test500RBE", (_,i) -> { return test500RBE(aB.clone(), RANGE - 20 + (i % 30), vL1); }); + testGroups.get("test501BE").put("test501aBE", (_,i) -> { return test501aBE(aB.clone(), RANGE - 20 + (i % 30), vL1); }); + // +-------------------+ + // Create offsets that are sometimes going to pass all RangeChecks, and sometimes one, and sometimes none. + // Consequence: all RangeChecks stay in the final compilation. + + testGroups.put("test502BE", new HashMap()); + testGroups.get("test502BE").put("test500RBE", (w,i) -> { return test500RBE(aB.clone(), w ? offset1 : RANGE - 20 + (i % 30), vL1); }); + testGroups.get("test502BE").put("test502aBE", (w,i) -> { return test502aBE(aB.clone(), w ? offset1 : RANGE - 20 + (i % 30), vL1); }); + // +-----+ +-------------------+ + // First use something in range, and after warmup randomize going outside the range. + // Consequence: all RangeChecks stay in the final compilation. + testGroups.put("test600", new HashMap()); testGroups.get("test600").put("test600R", (_,i) -> { return test600R(aB.clone(), aI.clone(), i); }); testGroups.get("test600").put("test600a", (_,i) -> { return test600a(aB.clone(), aI.clone(), i); }); @@ -205,6 +241,10 @@ public TestMergeStores() { testGroups.put("test800", new HashMap()); testGroups.get("test800").put("test800R", (_,_) -> { return test800R(aB.clone(), offset1, vL1); }); testGroups.get("test800").put("test800a", (_,_) -> { return test800a(aB.clone(), offset1, vL1); }); + + testGroups.put("test800BE", new HashMap()); + testGroups.get("test800BE").put("test800RBE", (_,_) -> { return test800RBE(aB.clone(), offset1, vL1); }); + testGroups.get("test800BE").put("test800aBE", (_,_) -> { return test800aBE(aB.clone(), offset1, vL1); }); } @Warmup(100) @@ -228,24 +268,33 @@ public TestMergeStores() { "test2dBE", "test2eBE", "test3a", + "test3aBE", "test4a", + "test4aBE", "test5a", "test6a", "test7a", + "test7aBE", "test100a", "test101a", "test102a", + "test102aBE", "test200a", "test201a", "test202a", + "test202aBE", "test300a", "test400a", "test500a", "test501a", "test502a", + "test500aBE", + "test501aBE", + "test502aBE", "test600a", "test700a", - "test800a"}) + "test800a", + "test800aBE"}) public void runTests(RunInfo info) { // Repeat many times, so that we also have multiple iterations for post-warmup to potentially recompile int iters = info.isWarmUp() ? 1_000 : 50_000; @@ -781,51 +830,69 @@ static Object[] test2eBE(byte[] a, int offset, long v) { @DontCompile static Object[] test3R(byte[] a, int offset, long v) { - if (IS_BIG_ENDIAN) { - a[offset + 0] = (byte)(v >> 24); - a[offset + 1] = (byte)(v >> 16); - a[offset + 2] = (byte)(v >> 8); - a[offset + 3] = (byte)(v >> 0); - a[offset + 4] = (byte)(v >> 24); - a[offset + 5] = (byte)(v >> 16); - a[offset + 6] = (byte)(v >> 8); - a[offset + 7] = (byte)(v >> 0); - } else { - a[offset + 0] = (byte)(v >> 0); - a[offset + 1] = (byte)(v >> 8); - a[offset + 2] = (byte)(v >> 16); - a[offset + 3] = (byte)(v >> 24); - a[offset + 4] = (byte)(v >> 0); - a[offset + 5] = (byte)(v >> 8); - a[offset + 6] = (byte)(v >> 16); - a[offset + 7] = (byte)(v >> 24); - } + a[offset + 0] = (byte)(v >> 0); + a[offset + 1] = (byte)(v >> 8); + a[offset + 2] = (byte)(v >> 16); + a[offset + 3] = (byte)(v >> 24); + a[offset + 4] = (byte)(v >> 0); + a[offset + 5] = (byte)(v >> 8); + a[offset + 6] = (byte)(v >> 16); + a[offset + 7] = (byte)(v >> 24); return new Object[]{ a }; } @Test @IR(counts = {IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "2"}, - applyIf = {"UseUnalignedAccesses", "true"}) + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"little-endian", "true"}) + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, + applyIfPlatform = {"big-endian", "true"}) static Object[] test3a(byte[] a, int offset, long v) { - if (IS_BIG_ENDIAN) { - a[offset + 0] = (byte)(v >> 24); - a[offset + 1] = (byte)(v >> 16); - a[offset + 2] = (byte)(v >> 8); - a[offset + 3] = (byte)(v >> 0); - a[offset + 4] = (byte)(v >> 24); - a[offset + 5] = (byte)(v >> 16); - a[offset + 6] = (byte)(v >> 8); - a[offset + 7] = (byte)(v >> 0); - } else { - a[offset + 0] = (byte)(v >> 0); - a[offset + 1] = (byte)(v >> 8); - a[offset + 2] = (byte)(v >> 16); - a[offset + 3] = (byte)(v >> 24); - a[offset + 4] = (byte)(v >> 0); - a[offset + 5] = (byte)(v >> 8); - a[offset + 6] = (byte)(v >> 16); - a[offset + 7] = (byte)(v >> 24); - } + a[offset + 0] = (byte)(v >> 0); + a[offset + 1] = (byte)(v >> 8); + a[offset + 2] = (byte)(v >> 16); + a[offset + 3] = (byte)(v >> 24); + a[offset + 4] = (byte)(v >> 0); + a[offset + 5] = (byte)(v >> 8); + a[offset + 6] = (byte)(v >> 16); + a[offset + 7] = (byte)(v >> 24); + return new Object[]{ a }; + } + + @DontCompile + static Object[] test3RBE(byte[] a, int offset, long v) { + a[offset + 0] = (byte)(v >> 24); + a[offset + 1] = (byte)(v >> 16); + a[offset + 2] = (byte)(v >> 8); + a[offset + 3] = (byte)(v >> 0); + a[offset + 4] = (byte)(v >> 24); + a[offset + 5] = (byte)(v >> 16); + a[offset + 6] = (byte)(v >> 8); + a[offset + 7] = (byte)(v >> 0); + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, + applyIfPlatform = {"little-endian", "true"}) + @IR(counts = {IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "2"}, + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"big-endian", "true"}) + static Object[] test3aBE(byte[] a, int offset, long v) { + a[offset + 0] = (byte)(v >> 24); + a[offset + 1] = (byte)(v >> 16); + a[offset + 2] = (byte)(v >> 8); + a[offset + 3] = (byte)(v >> 0); + a[offset + 4] = (byte)(v >> 24); + a[offset + 5] = (byte)(v >> 16); + a[offset + 6] = (byte)(v >> 8); + a[offset + 7] = (byte)(v >> 0); return new Object[]{ a }; } @@ -835,32 +902,18 @@ static Object[] test4R(byte[] a, int offset, long v1, int v2, short v3, byte v4) a[offset + 1] = (byte)0xFF; a[offset + 2] = v4; a[offset + 3] = (byte)0x42; - if (IS_BIG_ENDIAN) { - a[offset + 4] = (byte)(v1 >> 8); - a[offset + 5] = (byte)(v1 >> 0); - } else { - a[offset + 4] = (byte)(v1 >> 0); - a[offset + 5] = (byte)(v1 >> 8); - } + a[offset + 4] = (byte)(v1 >> 0); + a[offset + 5] = (byte)(v1 >> 8); a[offset + 6] = (byte)0xAB; a[offset + 7] = (byte)0xCD; a[offset + 8] = (byte)0xEF; a[offset + 9] = (byte)0x01; - if (IS_BIG_ENDIAN) { - a[offset + 10] = (byte)(v2 >> 24); - a[offset + 11] = (byte)(v2 >> 16); - a[offset + 12] = (byte)(v2 >> 8); - a[offset + 13] = (byte)(v2 >> 0); - a[offset + 14] = (byte)(v3 >> 8); - a[offset + 15] = (byte)(v3 >> 0); - } else { - a[offset + 10] = (byte)(v2 >> 0); - a[offset + 11] = (byte)(v2 >> 8); - a[offset + 12] = (byte)(v2 >> 16); - a[offset + 13] = (byte)(v2 >> 24); - a[offset + 14] = (byte)(v3 >> 0); - a[offset + 15] = (byte)(v3 >> 8); - } + a[offset + 10] = (byte)(v2 >> 0); + a[offset + 11] = (byte)(v2 >> 8); + a[offset + 12] = (byte)(v2 >> 16); + a[offset + 13] = (byte)(v2 >> 24); + a[offset + 14] = (byte)(v3 >> 0); + a[offset + 15] = (byte)(v3 >> 8); a[offset + 16] = (byte)0xEF; return new Object[]{ a }; } @@ -870,38 +923,87 @@ static Object[] test4R(byte[] a, int offset, long v1, int v2, short v3, byte v4) IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "3", IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "2", IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, - applyIf = {"UseUnalignedAccesses", "true"}) + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"little-endian", "true"}) + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "12", + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", // Stores of constants can be merged + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"big-endian", "true"}) static Object[] test4a(byte[] a, int offset, long v1, int v2, short v3, byte v4) { a[offset + 0] = (byte)0x00; // individual load expected to go into state of RC a[offset + 1] = (byte)0xFF; a[offset + 2] = v4; a[offset + 3] = (byte)0x42; - if (IS_BIG_ENDIAN) { - a[offset + 4] = (byte)(v1 >> 8); - a[offset + 5] = (byte)(v1 >> 0); - } else { - a[offset + 4] = (byte)(v1 >> 0); - a[offset + 5] = (byte)(v1 >> 8); - } + a[offset + 4] = (byte)(v1 >> 0); + a[offset + 5] = (byte)(v1 >> 8); a[offset + 6] = (byte)0xAB; a[offset + 7] = (byte)0xCD; a[offset + 8] = (byte)0xEF; a[offset + 9] = (byte)0x01; - if (IS_BIG_ENDIAN) { - a[offset + 10] = (byte)(v2 >> 24); - a[offset + 11] = (byte)(v2 >> 16); - a[offset + 12] = (byte)(v2 >> 8); - a[offset + 13] = (byte)(v2 >> 0); - a[offset + 14] = (byte)(v3 >> 8); - a[offset + 15] = (byte)(v3 >> 0); - } else { - a[offset + 10] = (byte)(v2 >> 0); - a[offset + 11] = (byte)(v2 >> 8); - a[offset + 12] = (byte)(v2 >> 16); - a[offset + 13] = (byte)(v2 >> 24); - a[offset + 14] = (byte)(v3 >> 0); - a[offset + 15] = (byte)(v3 >> 8); - } + a[offset + 10] = (byte)(v2 >> 0); + a[offset + 11] = (byte)(v2 >> 8); + a[offset + 12] = (byte)(v2 >> 16); + a[offset + 13] = (byte)(v2 >> 24); + a[offset + 14] = (byte)(v3 >> 0); + a[offset + 15] = (byte)(v3 >> 8); + a[offset + 16] = (byte)0xEF; + return new Object[]{ a }; + } + + @DontCompile + static Object[] test4RBE(byte[] a, int offset, long v1, int v2, short v3, byte v4) { + a[offset + 0] = (byte)0x00; + a[offset + 1] = (byte)0xFF; + a[offset + 2] = v4; + a[offset + 3] = (byte)0x42; + a[offset + 4] = (byte)(v1 >> 8); + a[offset + 5] = (byte)(v1 >> 0); + a[offset + 6] = (byte)0xAB; + a[offset + 7] = (byte)0xCD; + a[offset + 8] = (byte)0xEF; + a[offset + 9] = (byte)0x01; + a[offset + 10] = (byte)(v2 >> 24); + a[offset + 11] = (byte)(v2 >> 16); + a[offset + 12] = (byte)(v2 >> 8); + a[offset + 13] = (byte)(v2 >> 0); + a[offset + 14] = (byte)(v3 >> 8); + a[offset + 15] = (byte)(v3 >> 0); + a[offset + 16] = (byte)0xEF; + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "12", + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", // Stores of constants can be merged + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"little-endian", "true"}) + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "4", // 3 (+ 1 for uncommon trap) + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "3", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "2", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"big-endian", "true"}) + static Object[] test4aBE(byte[] a, int offset, long v1, int v2, short v3, byte v4) { + a[offset + 0] = (byte)0x00; // individual load expected to go into state of RC + a[offset + 1] = (byte)0xFF; + a[offset + 2] = v4; + a[offset + 3] = (byte)0x42; + a[offset + 4] = (byte)(v1 >> 8); + a[offset + 5] = (byte)(v1 >> 0); + a[offset + 6] = (byte)0xAB; + a[offset + 7] = (byte)0xCD; + a[offset + 8] = (byte)0xEF; + a[offset + 9] = (byte)0x01; + a[offset + 10] = (byte)(v2 >> 24); + a[offset + 11] = (byte)(v2 >> 16); + a[offset + 12] = (byte)(v2 >> 8); + a[offset + 13] = (byte)(v2 >> 0); + a[offset + 14] = (byte)(v3 >> 8); + a[offset + 15] = (byte)(v3 >> 0); a[offset + 16] = (byte)0xEF; return new Object[]{ a }; } @@ -983,15 +1085,9 @@ static Object[] test6a(byte[] a, byte[] b, int offset1, int offset2) { @DontCompile static Object[] test7R(byte[] a, int offset1, int v1) { - if (IS_BIG_ENDIAN) { - a[offset1 + 1] = (byte)(v1 >> 24); - a[offset1 + 2] = (byte)(v1 >> 16); - a[offset1 + 3] = (byte)(v1 >> 8); - } else { - a[offset1 + 1] = (byte)(v1 >> 8); - a[offset1 + 2] = (byte)(v1 >> 16); - a[offset1 + 3] = (byte)(v1 >> 24); - } + a[offset1 + 1] = (byte)(v1 >> 8); + a[offset1 + 2] = (byte)(v1 >> 16); + a[offset1 + 3] = (byte)(v1 >> 24); return new Object[]{ a }; } @@ -1001,15 +1097,29 @@ static Object[] test7R(byte[] a, int offset1, int v1) { IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}) static Object[] test7a(byte[] a, int offset1, int v1) { - if (IS_BIG_ENDIAN) { - a[offset1 + 1] = (byte)(v1 >> 24); - a[offset1 + 2] = (byte)(v1 >> 16); - a[offset1 + 3] = (byte)(v1 >> 8); - } else { - a[offset1 + 1] = (byte)(v1 >> 8); - a[offset1 + 2] = (byte)(v1 >> 16); - a[offset1 + 3] = (byte)(v1 >> 24); - } + a[offset1 + 1] = (byte)(v1 >> 8); + a[offset1 + 2] = (byte)(v1 >> 16); + a[offset1 + 3] = (byte)(v1 >> 24); + return new Object[]{ a }; + } + + @DontCompile + static Object[] test7RBE(byte[] a, int offset1, int v1) { + a[offset1 + 1] = (byte)(v1 >> 24); + a[offset1 + 2] = (byte)(v1 >> 16); + a[offset1 + 3] = (byte)(v1 >> 8); + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "3", + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}) + static Object[] test7aBE(byte[] a, int offset1, int v1) { + a[offset1 + 1] = (byte)(v1 >> 24); + a[offset1 + 2] = (byte)(v1 >> 16); + a[offset1 + 3] = (byte)(v1 >> 8); return new Object[]{ a }; } @@ -1107,33 +1217,18 @@ static Object[] test102R(short[] a, int offset, long v1, int v2, short v3) { a[offset + 1] = (short)0xFFFF; a[offset + 2] = v3; a[offset + 3] = (short)0x4242; - if (IS_BIG_ENDIAN) { - a[offset + 4] = (short)(v1 >> 16); - a[offset + 5] = (short)(v1 >> 0); - a[offset + 6] = (short)0xAB11; - a[offset + 7] = (short)0xCD36; - a[offset + 8] = (short)0xEF89; - a[offset + 9] = (short)0x0156; - a[offset + 10] = (short)(v1 >> 48); - a[offset + 11] = (short)(v1 >> 32); - a[offset + 12] = (short)(v1 >> 16); - a[offset + 13] = (short)(v1 >> 0); - a[offset + 14] = (short)(v2 >> 16); - a[offset + 15] = (short)(v2 >> 0); - } else { - a[offset + 4] = (short)(v1 >> 0); - a[offset + 5] = (short)(v1 >> 16); - a[offset + 6] = (short)0xAB11; - a[offset + 7] = (short)0xCD36; - a[offset + 8] = (short)0xEF89; - a[offset + 9] = (short)0x0156; - a[offset + 10] = (short)(v1 >> 0); - a[offset + 11] = (short)(v1 >> 16); - a[offset + 12] = (short)(v1 >> 32); - a[offset + 13] = (short)(v1 >> 48); - a[offset + 14] = (short)(v2 >> 0); - a[offset + 15] = (short)(v2 >> 16); - } + a[offset + 4] = (short)(v1 >> 0); + a[offset + 5] = (short)(v1 >> 16); + a[offset + 6] = (short)0xAB11; + a[offset + 7] = (short)0xCD36; + a[offset + 8] = (short)0xEF89; + a[offset + 9] = (short)0x0156; + a[offset + 10] = (short)(v1 >> 0); + a[offset + 11] = (short)(v1 >> 16); + a[offset + 12] = (short)(v1 >> 32); + a[offset + 13] = (short)(v1 >> 48); + a[offset + 14] = (short)(v2 >> 0); + a[offset + 15] = (short)(v2 >> 16); a[offset + 16] = (short)0xEFEF; return new Object[]{ a }; } @@ -1143,39 +1238,87 @@ static Object[] test102R(short[] a, int offset, long v1, int v2, short v3) { IRNode.STORE_C_OF_CLASS, "short\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "4", // 3 (+1 that goes into RC) IRNode.STORE_I_OF_CLASS, "short\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "3", IRNode.STORE_L_OF_CLASS, "short\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "2"}, - applyIf = {"UseUnalignedAccesses", "true"}) + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"little-endian", "true"}) + @IR(counts = {IRNode.STORE_B_OF_CLASS, "short\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_C_OF_CLASS, "short\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "12", + IRNode.STORE_I_OF_CLASS, "short\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", // Stores of constants can be merged + IRNode.STORE_L_OF_CLASS, "short\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"big-endian", "true"}) static Object[] test102a(short[] a, int offset, long v1, int v2, short v3) { a[offset + 0] = (short)0x0000; // store goes into RC a[offset + 1] = (short)0xFFFF; a[offset + 2] = v3; a[offset + 3] = (short)0x4242; - if (IS_BIG_ENDIAN) { - a[offset + 4] = (short)(v1 >> 16); - a[offset + 5] = (short)(v1 >> 0); - a[offset + 6] = (short)0xAB11; - a[offset + 7] = (short)0xCD36; - a[offset + 8] = (short)0xEF89; - a[offset + 9] = (short)0x0156; - a[offset + 10] = (short)(v1 >> 48); - a[offset + 11] = (short)(v1 >> 32); - a[offset + 12] = (short)(v1 >> 16); - a[offset + 13] = (short)(v1 >> 0); - a[offset + 14] = (short)(v2 >> 16); - a[offset + 15] = (short)(v2 >> 0); - } else { - a[offset + 4] = (short)(v1 >> 0); - a[offset + 5] = (short)(v1 >> 16); - a[offset + 6] = (short)0xAB11; - a[offset + 7] = (short)0xCD36; - a[offset + 8] = (short)0xEF89; - a[offset + 9] = (short)0x0156; - a[offset + 10] = (short)(v1 >> 0); - a[offset + 11] = (short)(v1 >> 16); - a[offset + 12] = (short)(v1 >> 32); - a[offset + 13] = (short)(v1 >> 48); - a[offset + 14] = (short)(v2 >> 0); - a[offset + 15] = (short)(v2 >> 16); - } + a[offset + 4] = (short)(v1 >> 0); + a[offset + 5] = (short)(v1 >> 16); + a[offset + 6] = (short)0xAB11; + a[offset + 7] = (short)0xCD36; + a[offset + 8] = (short)0xEF89; + a[offset + 9] = (short)0x0156; + a[offset + 10] = (short)(v1 >> 0); + a[offset + 11] = (short)(v1 >> 16); + a[offset + 12] = (short)(v1 >> 32); + a[offset + 13] = (short)(v1 >> 48); + a[offset + 14] = (short)(v2 >> 0); + a[offset + 15] = (short)(v2 >> 16); + a[offset + 16] = (short)0xEFEF; + return new Object[]{ a }; + } + + @DontCompile + static Object[] test102RBE(short[] a, int offset, long v1, int v2, short v3) { + a[offset + 0] = (short)0x0000; + a[offset + 1] = (short)0xFFFF; + a[offset + 2] = v3; + a[offset + 3] = (short)0x4242; + a[offset + 4] = (short)(v1 >> 16); + a[offset + 5] = (short)(v1 >> 0); + a[offset + 6] = (short)0xAB11; + a[offset + 7] = (short)0xCD36; + a[offset + 8] = (short)0xEF89; + a[offset + 9] = (short)0x0156; + a[offset + 10] = (short)(v1 >> 48); + a[offset + 11] = (short)(v1 >> 32); + a[offset + 12] = (short)(v1 >> 16); + a[offset + 13] = (short)(v1 >> 0); + a[offset + 14] = (short)(v2 >> 16); + a[offset + 15] = (short)(v2 >> 0); + a[offset + 16] = (short)0xEFEF; + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "short\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_C_OF_CLASS, "short\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "12", + IRNode.STORE_I_OF_CLASS, "short\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", // Stores of constants can be merged + IRNode.STORE_L_OF_CLASS, "short\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"little-endian", "true"}) + @IR(counts = {IRNode.STORE_B_OF_CLASS, "short\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_C_OF_CLASS, "short\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "4", // 3 (+1 that goes into RC) + IRNode.STORE_I_OF_CLASS, "short\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "3", + IRNode.STORE_L_OF_CLASS, "short\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "2"}, + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"big-endian", "true"}) + static Object[] test102aBE(short[] a, int offset, long v1, int v2, short v3) { + a[offset + 0] = (short)0x0000; // store goes into RC + a[offset + 1] = (short)0xFFFF; + a[offset + 2] = v3; + a[offset + 3] = (short)0x4242; + a[offset + 4] = (short)(v1 >> 16); + a[offset + 5] = (short)(v1 >> 0); + a[offset + 6] = (short)0xAB11; + a[offset + 7] = (short)0xCD36; + a[offset + 8] = (short)0xEF89; + a[offset + 9] = (short)0x0156; + a[offset + 10] = (short)(v1 >> 48); + a[offset + 11] = (short)(v1 >> 32); + a[offset + 12] = (short)(v1 >> 16); + a[offset + 13] = (short)(v1 >> 0); + a[offset + 14] = (short)(v2 >> 16); + a[offset + 15] = (short)(v2 >> 0); a[offset + 16] = (short)0xEFEF; return new Object[]{ a }; } @@ -1274,28 +1417,16 @@ static Object[] test202R(int[] a, int offset, long v1, int v2) { a[offset + 1] = 0xFFFFFFFF; a[offset + 2] = v2; a[offset + 3] = 0x42424242; - if (IS_BIG_ENDIAN) { - a[offset + 4] = (int)(v1 >> 32); - a[offset + 5] = (int)(v1 >> 0); - } else { - a[offset + 4] = (int)(v1 >> 0); - a[offset + 5] = (int)(v1 >> 32); - } + a[offset + 4] = (int)(v1 >> 0); + a[offset + 5] = (int)(v1 >> 32); a[offset + 6] = 0xAB110129; a[offset + 7] = 0xCD360183; a[offset + 8] = 0xEF890173; a[offset + 9] = 0x01560124; - if (IS_BIG_ENDIAN) { - a[offset + 10] = (int)(v1 >> 32); - a[offset + 11] = (int)(v1 >> 0); - a[offset + 12] = (int)(v1 >> 32); - a[offset + 13] = (int)(v1 >> 0); - } else { - a[offset + 10] = (int)(v1 >> 0); - a[offset + 11] = (int)(v1 >> 32); - a[offset + 12] = (int)(v1 >> 0); - a[offset + 13] = (int)(v1 >> 32); - } + a[offset + 10] = (int)(v1 >> 0); + a[offset + 11] = (int)(v1 >> 32); + a[offset + 12] = (int)(v1 >> 0); + a[offset + 13] = (int)(v1 >> 32); a[offset + 14] = v2; a[offset + 15] = v2; a[offset + 16] = 0xEFEFEFEF; @@ -1307,34 +1438,85 @@ static Object[] test202R(int[] a, int offset, long v1, int v2) { IRNode.STORE_C_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_I_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "6", // 5 (+1 that goes into RC) IRNode.STORE_L_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "6"}, - applyIf = {"UseUnalignedAccesses", "true"}) + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"little-endian", "true"}) + @IR(counts = {IRNode.STORE_B_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_C_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "10", + IRNode.STORE_L_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "4"}, // Stores of constants can be merged + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"big-endian", "true"}) static Object[] test202a(int[] a, int offset, long v1, int v2) { a[offset + 0] = 0x00000000; // merged with store below, but also kept unchanged for RC a[offset + 1] = 0xFFFFFFFF; a[offset + 2] = v2; a[offset + 3] = 0x42424242; - if (IS_BIG_ENDIAN) { - a[offset + 4] = (int)(v1 >> 32); - a[offset + 5] = (int)(v1 >> 0); - } else { - a[offset + 4] = (int)(v1 >> 0); - a[offset + 5] = (int)(v1 >> 32); - } + a[offset + 4] = (int)(v1 >> 0); + a[offset + 5] = (int)(v1 >> 32); a[offset + 6] = 0xAB110129; a[offset + 7] = 0xCD360183; a[offset + 8] = 0xEF890173; a[offset + 9] = 0x01560124; - if (IS_BIG_ENDIAN) { - a[offset + 10] = (int)(v1 >> 32); - a[offset + 11] = (int)(v1 >> 0); - a[offset + 12] = (int)(v1 >> 32); - a[offset + 13] = (int)(v1 >> 0); - } else { - a[offset + 10] = (int)(v1 >> 0); - a[offset + 11] = (int)(v1 >> 32); - a[offset + 12] = (int)(v1 >> 0); - a[offset + 13] = (int)(v1 >> 32); - } + a[offset + 10] = (int)(v1 >> 0); + a[offset + 11] = (int)(v1 >> 32); // Stores to +11 and +12 can be merged also on big-endian + a[offset + 12] = (int)(v1 >> 0); + a[offset + 13] = (int)(v1 >> 32); + a[offset + 14] = v2; + a[offset + 15] = v2; + a[offset + 16] = 0xEFEFEFEF; + return new Object[]{ a }; + } + + @DontCompile + static Object[] test202RBE(int[] a, int offset, long v1, int v2) { + a[offset + 0] = 0x00000000; + a[offset + 1] = 0xFFFFFFFF; + a[offset + 2] = v2; + a[offset + 3] = 0x42424242; + a[offset + 4] = (int)(v1 >> 32); + a[offset + 5] = (int)(v1 >> 0); + a[offset + 6] = 0xAB110129; + a[offset + 7] = 0xCD360183; + a[offset + 8] = 0xEF890173; + a[offset + 9] = 0x01560124; + a[offset + 10] = (int)(v1 >> 32); + a[offset + 11] = (int)(v1 >> 0); + a[offset + 12] = (int)(v1 >> 32); + a[offset + 13] = (int)(v1 >> 0); + a[offset + 14] = v2; + a[offset + 15] = v2; + a[offset + 16] = 0xEFEFEFEF; + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_C_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "10", + IRNode.STORE_L_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "4"}, // Stores of constants can be merged + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"little-endian", "true"}) + @IR(counts = {IRNode.STORE_B_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_C_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "6", // 5 (+1 that goes into RC) + IRNode.STORE_L_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "6"}, + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"big-endian", "true"}) + static Object[] test202aBE(int[] a, int offset, long v1, int v2) { + a[offset + 0] = 0x00000000; // merged with store below, but also kept unchanged for RC + a[offset + 1] = 0xFFFFFFFF; + a[offset + 2] = v2; + a[offset + 3] = 0x42424242; + a[offset + 4] = (int)(v1 >> 32); + a[offset + 5] = (int)(v1 >> 0); + a[offset + 6] = 0xAB110129; + a[offset + 7] = 0xCD360183; + a[offset + 8] = 0xEF890173; + a[offset + 9] = 0x01560124; + a[offset + 10] = (int)(v1 >> 32); + a[offset + 11] = (int)(v1 >> 0); // Stores to +11 and +12 can be merged also on little-endian + a[offset + 12] = (int)(v1 >> 32); + a[offset + 13] = (int)(v1 >> 0); a[offset + 14] = v2; a[offset + 15] = v2; a[offset + 16] = 0xEFEFEFEF; @@ -1405,49 +1587,26 @@ static Object[] test400a(int[] a) { // 502a: during warmup never violate RangeCheck -> compile once with merged stores // but then after warmup violate RangeCheck -> recompile without merged stores static Object[] test500R(byte[] a, int offset, long v) { - if (IS_BIG_ENDIAN) { - int idx = 0; - try { - a[offset + 0] = (byte)(v >> 56); - idx = 1; - a[offset + 1] = (byte)(v >> 48); - idx = 2; - a[offset + 2] = (byte)(v >> 40); - idx = 3; - a[offset + 3] = (byte)(v >> 32); - idx = 4; - a[offset + 4] = (byte)(v >> 24); - idx = 5; - a[offset + 5] = (byte)(v >> 16); - idx = 6; - a[offset + 6] = (byte)(v >> 8); - idx = 7; - a[offset + 7] = (byte)(v >> 0); - idx = 8; - } catch (ArrayIndexOutOfBoundsException _) {} - return new Object[]{ a, new int[]{ idx } }; - } else { - int idx = 0; - try { - a[offset + 0] = (byte)(v >> 0); - idx = 1; - a[offset + 1] = (byte)(v >> 8); - idx = 2; - a[offset + 2] = (byte)(v >> 16); - idx = 3; - a[offset + 3] = (byte)(v >> 24); - idx = 4; - a[offset + 4] = (byte)(v >> 32); - idx = 5; - a[offset + 5] = (byte)(v >> 40); - idx = 6; - a[offset + 6] = (byte)(v >> 48); - idx = 7; - a[offset + 7] = (byte)(v >> 56); - idx = 8; - } catch (ArrayIndexOutOfBoundsException _) {} - return new Object[]{ a, new int[]{ idx } }; - } + int idx = 0; + try { + a[offset + 0] = (byte)(v >> 0); + idx = 1; + a[offset + 1] = (byte)(v >> 8); + idx = 2; + a[offset + 2] = (byte)(v >> 16); + idx = 3; + a[offset + 3] = (byte)(v >> 24); + idx = 4; + a[offset + 4] = (byte)(v >> 32); + idx = 5; + a[offset + 5] = (byte)(v >> 40); + idx = 6; + a[offset + 6] = (byte)(v >> 48); + idx = 7; + a[offset + 7] = (byte)(v >> 56); + idx = 8; + } catch (ArrayIndexOutOfBoundsException _) {} + return new Object[]{ a, new int[]{ idx } }; } @Test @@ -1455,55 +1614,158 @@ static Object[] test500R(byte[] a, int offset, long v) { IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // expect merged - applyIf = {"UseUnalignedAccesses", "true"}) + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"little-endian", "true"}) + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, + applyIfPlatform = {"big-endian", "true"}) static Object[] test500a(byte[] a, int offset, long v) { - if (IS_BIG_ENDIAN) { - int idx = 0; - try { - a[offset + 0] = (byte)(v >> 56); - idx = 1; - a[offset + 1] = (byte)(v >> 48); - idx = 2; - a[offset + 2] = (byte)(v >> 40); - idx = 3; - a[offset + 3] = (byte)(v >> 32); - idx = 4; - a[offset + 4] = (byte)(v >> 24); - idx = 5; - a[offset + 5] = (byte)(v >> 16); - idx = 6; - a[offset + 6] = (byte)(v >> 8); - idx = 7; - a[offset + 7] = (byte)(v >> 0); - idx = 8; - } catch (ArrayIndexOutOfBoundsException _) {} - return new Object[]{ a, new int[]{ idx } }; - } else { - int idx = 0; - try { - a[offset + 0] = (byte)(v >> 0); - idx = 1; - a[offset + 1] = (byte)(v >> 8); - idx = 2; - a[offset + 2] = (byte)(v >> 16); - idx = 3; - a[offset + 3] = (byte)(v >> 24); - idx = 4; - a[offset + 4] = (byte)(v >> 32); - idx = 5; - a[offset + 5] = (byte)(v >> 40); - idx = 6; - a[offset + 6] = (byte)(v >> 48); - idx = 7; - a[offset + 7] = (byte)(v >> 56); - idx = 8; - } catch (ArrayIndexOutOfBoundsException _) {} - return new Object[]{ a, new int[]{ idx } }; - } + int idx = 0; + try { + a[offset + 0] = (byte)(v >> 0); + idx = 1; + a[offset + 1] = (byte)(v >> 8); + idx = 2; + a[offset + 2] = (byte)(v >> 16); + idx = 3; + a[offset + 3] = (byte)(v >> 24); + idx = 4; + a[offset + 4] = (byte)(v >> 32); + idx = 5; + a[offset + 5] = (byte)(v >> 40); + idx = 6; + a[offset + 6] = (byte)(v >> 48); + idx = 7; + a[offset + 7] = (byte)(v >> 56); + idx = 8; + } catch (ArrayIndexOutOfBoundsException _) {} + return new Object[]{ a, new int[]{ idx } }; } @Test @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", // No optimization because of too many RangeChecks + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}) + static Object[] test501a(byte[] a, int offset, long v) { + int idx = 0; + try { + a[offset + 0] = (byte)(v >> 0); + idx = 1; + a[offset + 1] = (byte)(v >> 8); + idx = 2; + a[offset + 2] = (byte)(v >> 16); + idx = 3; + a[offset + 3] = (byte)(v >> 24); + idx = 4; + a[offset + 4] = (byte)(v >> 32); + idx = 5; + a[offset + 5] = (byte)(v >> 40); + idx = 6; + a[offset + 6] = (byte)(v >> 48); + idx = 7; + a[offset + 7] = (byte)(v >> 56); + idx = 8; + } catch (ArrayIndexOutOfBoundsException _) {} + return new Object[]{ a, new int[]{ idx } }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", // No optimization because of too many RangeChecks + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}) + static Object[] test502a(byte[] a, int offset, long v) { + int idx = 0; + try { + a[offset + 0] = (byte)(v >> 0); + idx = 1; + a[offset + 1] = (byte)(v >> 8); + idx = 2; + a[offset + 2] = (byte)(v >> 16); + idx = 3; + a[offset + 3] = (byte)(v >> 24); + idx = 4; + a[offset + 4] = (byte)(v >> 32); + idx = 5; + a[offset + 5] = (byte)(v >> 40); + idx = 6; + a[offset + 6] = (byte)(v >> 48); + idx = 7; + a[offset + 7] = (byte)(v >> 56); + idx = 8; + } catch (ArrayIndexOutOfBoundsException _) {} + return new Object[]{ a, new int[]{ idx } }; + } + + @DontCompile + // The 500-series has all the same code, but is executed with different inputs: + // 500a: never violate a RangeCheck -> expect will always merge stores + // 501a: randomly violate RangeCheck, also during warmup -> never merge stores + // 502a: during warmup never violate RangeCheck -> compile once with merged stores + // but then after warmup violate RangeCheck -> recompile without merged stores + static Object[] test500RBE(byte[] a, int offset, long v) { + int idx = 0; + try { + a[offset + 0] = (byte)(v >> 56); + idx = 1; + a[offset + 1] = (byte)(v >> 48); + idx = 2; + a[offset + 2] = (byte)(v >> 40); + idx = 3; + a[offset + 3] = (byte)(v >> 32); + idx = 4; + a[offset + 4] = (byte)(v >> 24); + idx = 5; + a[offset + 5] = (byte)(v >> 16); + idx = 6; + a[offset + 6] = (byte)(v >> 8); + idx = 7; + a[offset + 7] = (byte)(v >> 0); + idx = 8; + } catch (ArrayIndexOutOfBoundsException _) {} + return new Object[]{ a, new int[]{ idx } }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, + applyIfPlatform = {"little-endian", "true"}) + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", // for RangeCheck trap + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // expect merged + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"big-endian", "true"}) + static Object[] test500aBE(byte[] a, int offset, long v) { + int idx = 0; + try { + a[offset + 0] = (byte)(v >> 56); + idx = 1; + a[offset + 1] = (byte)(v >> 48); + idx = 2; + a[offset + 2] = (byte)(v >> 40); + idx = 3; + a[offset + 3] = (byte)(v >> 32); + idx = 4; + a[offset + 4] = (byte)(v >> 24); + idx = 5; + a[offset + 5] = (byte)(v >> 16); + idx = 6; + a[offset + 6] = (byte)(v >> 8); + idx = 7; + a[offset + 7] = (byte)(v >> 0); + idx = 8; + } catch (ArrayIndexOutOfBoundsException _) {} + return new Object[]{ a, new int[]{ idx } }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, @@ -1514,54 +1776,31 @@ static Object[] test500a(byte[] a, int offset, long v) { IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, applyIf = {"UseUnalignedAccesses", "true"}, applyIfPlatform = {"big-endian", "true"}) - static Object[] test501a(byte[] a, int offset, long v) { - if (IS_BIG_ENDIAN) { - int idx = 0; - try { - a[offset + 0] = (byte)(v >> 56); - idx = 1; - a[offset + 1] = (byte)(v >> 48); - idx = 2; - a[offset + 2] = (byte)(v >> 40); - idx = 3; - a[offset + 3] = (byte)(v >> 32); - idx = 4; - a[offset + 4] = (byte)(v >> 24); - idx = 5; - a[offset + 5] = (byte)(v >> 16); - idx = 6; - a[offset + 6] = (byte)(v >> 8); // 2 lowest StoreB are merged. 7th StoreB is - idx = 7; // needed if RC for a[offset + 7] fails. - a[offset + 7] = (byte)(v >> 0); - idx = 8; - } catch (ArrayIndexOutOfBoundsException _) {} - return new Object[]{ a, new int[]{ idx } }; - } else { - int idx = 0; - try { - a[offset + 0] = (byte)(v >> 0); - idx = 1; - a[offset + 1] = (byte)(v >> 8); - idx = 2; - a[offset + 2] = (byte)(v >> 16); - idx = 3; - a[offset + 3] = (byte)(v >> 24); - idx = 4; - a[offset + 4] = (byte)(v >> 32); - idx = 5; - a[offset + 5] = (byte)(v >> 40); - idx = 6; - a[offset + 6] = (byte)(v >> 48); - idx = 7; - a[offset + 7] = (byte)(v >> 56); - idx = 8; - } catch (ArrayIndexOutOfBoundsException _) {} - return new Object[]{ a, new int[]{ idx } }; - } + static Object[] test501aBE(byte[] a, int offset, long v) { + int idx = 0; + try { + a[offset + 0] = (byte)(v >> 56); + idx = 1; + a[offset + 1] = (byte)(v >> 48); + idx = 2; + a[offset + 2] = (byte)(v >> 40); + idx = 3; + a[offset + 3] = (byte)(v >> 32); + idx = 4; + a[offset + 4] = (byte)(v >> 24); + idx = 5; + a[offset + 5] = (byte)(v >> 16); + idx = 6; + a[offset + 6] = (byte)(v >> 8); + idx = 7; + a[offset + 7] = (byte)(v >> 0); + idx = 8; + } catch (ArrayIndexOutOfBoundsException _) {} + return new Object[]{ a, new int[]{ idx } }; } @Test - @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", // No optimization because of too many RangeChecks + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, @@ -1572,50 +1811,27 @@ static Object[] test501a(byte[] a, int offset, long v) { IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, applyIf = {"UseUnalignedAccesses", "true"}, applyIfPlatform = {"big-endian", "true"}) - static Object[] test502a(byte[] a, int offset, long v) { - if (IS_BIG_ENDIAN) { - int idx = 0; - try { - a[offset + 0] = (byte)(v >> 56); - idx = 1; - a[offset + 1] = (byte)(v >> 48); - idx = 2; - a[offset + 2] = (byte)(v >> 40); - idx = 3; - a[offset + 3] = (byte)(v >> 32); - idx = 4; - a[offset + 4] = (byte)(v >> 24); - idx = 5; - a[offset + 5] = (byte)(v >> 16); - idx = 6; - a[offset + 6] = (byte)(v >> 8); // 2 lowest StoreB are merged. 7th StoreB is - idx = 7; // needed if RC for a[offset + 7] fails. - a[offset + 7] = (byte)(v >> 0); - idx = 8; - } catch (ArrayIndexOutOfBoundsException _) {} - return new Object[]{ a, new int[]{ idx } }; - } else { - int idx = 0; - try { - a[offset + 0] = (byte)(v >> 0); - idx = 1; - a[offset + 1] = (byte)(v >> 8); - idx = 2; - a[offset + 2] = (byte)(v >> 16); - idx = 3; - a[offset + 3] = (byte)(v >> 24); - idx = 4; - a[offset + 4] = (byte)(v >> 32); - idx = 5; - a[offset + 5] = (byte)(v >> 40); - idx = 6; - a[offset + 6] = (byte)(v >> 48); - idx = 7; - a[offset + 7] = (byte)(v >> 56); - idx = 8; - } catch (ArrayIndexOutOfBoundsException _) {} - return new Object[]{ a, new int[]{ idx } }; - } + static Object[] test502aBE(byte[] a, int offset, long v) { + int idx = 0; + try { + a[offset + 0] = (byte)(v >> 56); + idx = 1; + a[offset + 1] = (byte)(v >> 48); + idx = 2; + a[offset + 2] = (byte)(v >> 40); + idx = 3; + a[offset + 3] = (byte)(v >> 32); + idx = 4; + a[offset + 4] = (byte)(v >> 24); + idx = 5; + a[offset + 5] = (byte)(v >> 16); + idx = 6; + a[offset + 6] = (byte)(v >> 8); + idx = 7; + a[offset + 7] = (byte)(v >> 0); + idx = 8; + } catch (ArrayIndexOutOfBoundsException _) {} + return new Object[]{ a, new int[]{ idx } }; } @DontCompile @@ -1685,21 +1901,41 @@ static Object[] test700a(int[] a, long v1) { @DontCompile static Object[] test800R(byte[] a, int offset, long v) { - if (IS_BIG_ENDIAN) { - a[offset + 0] = (byte)(v >> 40); - a[offset + 1] = (byte)(v >> 32); - a[offset + 2] = (byte)(v >> 24); - a[offset + 3] = (byte)(v >> 16); - a[offset + 4] = (byte)(v >> 8); - a[offset + 5] = (byte)(v >> 0); - } else { - a[offset + 0] = (byte)(v >> 0); - a[offset + 1] = (byte)(v >> 8); - a[offset + 2] = (byte)(v >> 16); - a[offset + 3] = (byte)(v >> 24); - a[offset + 4] = (byte)(v >> 32); - a[offset + 5] = (byte)(v >> 40); - } + a[offset + 0] = (byte)(v >> 0); + a[offset + 1] = (byte)(v >> 8); + a[offset + 2] = (byte)(v >> 16); + a[offset + 3] = (byte)(v >> 24); + a[offset + 4] = (byte)(v >> 32); + a[offset + 5] = (byte)(v >> 40); + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "6", + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}) + static Object[] test800a(byte[] a, int offset, long v) { + // Merge attempts begin at the lowest store in the Memory chain. + // Candidates are found following the chain. The list is trimmed to a + // power of 2 length by removing higher stores. + a[offset + 0] = (byte)(v >> 0); // Removed from candidate list + a[offset + 1] = (byte)(v >> 8); // Removed from candidate list + a[offset + 2] = (byte)(v >> 16); // The 4 following stores are on the candidate list. + a[offset + 3] = (byte)(v >> 24); // The current logic does not merge them + a[offset + 4] = (byte)(v >> 32); // since it would require shifting the input. + a[offset + 5] = (byte)(v >> 40); + return new Object[]{ a }; + } + + @DontCompile + static Object[] test800RBE(byte[] a, int offset, long v) { + a[offset + 0] = (byte)(v >> 40); + a[offset + 1] = (byte)(v >> 32); + a[offset + 2] = (byte)(v >> 24); + a[offset + 3] = (byte)(v >> 16); + a[offset + 4] = (byte)(v >> 8); + a[offset + 5] = (byte)(v >> 0); return new Object[]{ a }; } @@ -1715,25 +1951,16 @@ static Object[] test800R(byte[] a, int offset, long v) { IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, applyIf = {"UseUnalignedAccesses", "true"}, applyIfPlatform = {"big-endian", "true"}) - static Object[] test800a(byte[] a, int offset, long v) { + static Object[] test800aBE(byte[] a, int offset, long v) { // Merge attempts begin at the lowest store in the Memory chain. // Candidates are found following the chain. The list is trimmed to a // power of 2 length by removing higher stores. - if (IS_BIG_ENDIAN) { - a[offset + 0] = (byte)(v >> 40); // Removed from candidate list - a[offset + 1] = (byte)(v >> 32); // Removed from candidate list - a[offset + 2] = (byte)(v >> 24); // The 4 following stores are on the candidate list - a[offset + 3] = (byte)(v >> 16); // and they are successfully merged. - a[offset + 4] = (byte)(v >> 8); - a[offset + 5] = (byte)(v >> 0); - } else { - a[offset + 0] = (byte)(v >> 0); // Removed from candidate list - a[offset + 1] = (byte)(v >> 8); // Removed from candidate list - a[offset + 2] = (byte)(v >> 16); // The 4 following stores are on the candidate list. - a[offset + 3] = (byte)(v >> 24); // The current logic does not merge them - a[offset + 4] = (byte)(v >> 32); // since it would require shifting the input. - a[offset + 5] = (byte)(v >> 40); - } + a[offset + 0] = (byte)(v >> 40); // Removed from candidate list + a[offset + 1] = (byte)(v >> 32); // Removed from candidate list + a[offset + 2] = (byte)(v >> 24); // The 4 following stores are on the candidate list + a[offset + 3] = (byte)(v >> 16); // and they are successfully merged on big endian platforms. + a[offset + 4] = (byte)(v >> 8); + a[offset + 5] = (byte)(v >> 0); return new Object[]{ a }; } } From fc870e2b6c23695c9ceb0cfc5f758c521b2912a9 Mon Sep 17 00:00:00 2001 From: Richard Reingruber Date: Tue, 4 Jun 2024 15:52:13 +0200 Subject: [PATCH 9/9] Feedback Emanuel --- src/hotspot/share/opto/memnode.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index f426a4200df1c..415540a4c416b 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -3300,14 +3300,12 @@ Node* MergePrimitiveArrayStores::make_merged_input_value(const Node_List& merge_ // | | // _store first // -#ifdef VM_LITTLE_ENDIAN Node* hi = _store->in(MemNode::ValueIn); Node* lo = first->in(MemNode::ValueIn); -#else // VM_LITTLE_ENDIAN +#ifndef VM_LITTLE_ENDIAN // `_store` and `first` are swapped in the diagram above - Node* hi = first->in(MemNode::ValueIn); - Node* lo = _store->in(MemNode::ValueIn); -#endif // VM_LITTLE_ENDIAN + swap(hi, lo); +#endif // !VM_LITTLE_ENDIAN Node const* hi_base; jint hi_shift; merged_input_value = lo;