diff --git a/compiler/CHANGELOG.md b/compiler/CHANGELOG.md index bf34c6c73a15..55ab708dda28 100644 --- a/compiler/CHANGELOG.md +++ b/compiler/CHANGELOG.md @@ -4,6 +4,8 @@ This changelog summarizes newly introduced optimizations and other compiler rela ## GraalVM for JDK 23 (Internal Version 24.1.0) * (GR-50352): Added `-Djdk.graal.PrintPropertiesAll` to make `-XX:+JVMCIPrintProperties` show all Graal options. +* (GR-25968): New optimization for reducing code size on AMD64, by emitting smaller jump instructions if the displacement fits in one byte. + Enabled for Native Image O1-O3 per default; disabled elsewhere. Use `-Djdk.graal.OptimizeLongJumps=true` to enable. ## GraalVM for JDK 22 (Internal Version 24.0.0) * (GR-49876): Added `-Dgraal.PrintIntrinsics=true` to log the intrinsics used by Graal in the current runtime. diff --git a/compiler/src/jdk.graal.compiler.test/src/jdk/graal/compiler/asm/amd64/test/OptimizeLongJumpsTest.java b/compiler/src/jdk.graal.compiler.test/src/jdk/graal/compiler/asm/amd64/test/OptimizeLongJumpsTest.java new file mode 100644 index 000000000000..bef28e003ffc --- /dev/null +++ b/compiler/src/jdk.graal.compiler.test/src/jdk/graal/compiler/asm/amd64/test/OptimizeLongJumpsTest.java @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.graal.compiler.asm.amd64.test; + +import java.util.Arrays; + +import org.junit.Assume; +import org.junit.Before; +import org.junit.Test; + +import jdk.graal.compiler.asm.amd64.AMD64Assembler; +import jdk.graal.compiler.core.common.GraalOptions; +import jdk.graal.compiler.core.test.GraalCompilerTest; +import jdk.graal.compiler.hotspot.CompilerConfigurationFactory; +import jdk.graal.compiler.nodes.StructuredGraph; +import jdk.graal.compiler.nodes.StructuredGraph.AllowAssumptions; +import jdk.graal.compiler.options.OptionValues; +import jdk.vm.ci.amd64.AMD64; +import jdk.vm.ci.code.InstalledCode; +import jdk.vm.ci.code.InvalidInstalledCodeException; + +/** + * Regression tests for checking that + * {@link jdk.graal.compiler.core.common.GraalOptions#OptimizeLongJumps} replaces {@code jmp/jcc} + * instructions with {@code jmpb/jccb}. + */ +public class OptimizeLongJumpsTest extends GraalCompilerTest { + + @Before + public void checkAMD64() { + Assume.assumeTrue("skipping AMD64 specific test", getTarget().arch instanceof AMD64); + } + + public static int sideeffect = 0; + + public static int snippet01(int bound) { + for (int i = 0; i < bound; i++) { + sideeffect += i; + } + sideeffect = 0; + return sideeffect; + } + + public static int snippet02(boolean b, int x1, int x2) { + if (b) { + return x1; + } + return x2; + } + + @Test + public void test01() throws InvalidInstalledCodeException { + OptionValues options = new OptionValues(getInitialOptions(), AMD64Assembler.Options.UseBranchesWithin32ByteBoundary, true, CompilerConfigurationFactory.Options.CompilerConfiguration, + "economy"); + testOptimizeLongJumps("snippet01", options, 42); + } + + @Test + public void test02() throws InvalidInstalledCodeException { + OptionValues options = new OptionValues(getInitialOptions(), AMD64Assembler.Options.UseBranchesWithin32ByteBoundary, true, CompilerConfigurationFactory.Options.CompilerConfiguration, + "economy"); + testOptimizeLongJumps("snippet02", options, true, 1, 2); + } + + private void testOptimizeLongJumps(String method, OptionValues opts, Object... params) throws InvalidInstalledCodeException { + OptionValues optionsDefault = new OptionValues(opts, GraalOptions.OptimizeLongJumps, false); + StructuredGraph graphDefault = parseEager(method, AllowAssumptions.NO, optionsDefault); + InstalledCode codeDefault = null; + + OptionValues optionsOptimized = new OptionValues(opts, GraalOptions.OptimizeLongJumps, true); + StructuredGraph graphOptimized = parseEager(method, AllowAssumptions.NO, optionsOptimized); + InstalledCode codeOptimized = null; + + for (int i = 0; i < 3; i++) { + + /* + * Why using a loop: The optimization is considered successful, if there are fewer + * jmp/jcc instructions compared to the unoptimized code. To assert this condition, + * checkCode counts long jump / jcc opcodes in the raw code byte arrays. Thus, under + * rare circumstances, bytes from constants, displacements, etc can "look" like the + * opcodes we are searching for which can lead to false counts. If the success condition + * does not hold, we redo the code emits trying to rule out false positives and only + * fail if the success condition does not hold repeatedly. + */ + + codeDefault = getCode(graphDefault.method(), graphDefault, true, true, optionsDefault); + Object resultDefault = codeDefault.executeVarargs(params); + + codeOptimized = getCode(graphOptimized.method(), graphOptimized, true, true, optionsOptimized); + Object resultOptimized = codeOptimized.executeVarargs(params); + + assertTrue(String.format("Optimized code should behave identically! Result (default): %d | Result (optimized): %d", resultDefault, resultOptimized), resultDefault.equals(resultOptimized)); + if (checkCode(codeDefault, codeOptimized)) { + return; + } + } + fail(String.format("Optimized code should have fewer long jumps!\n\tDefault code: %s\n\tOptimized code: %s", byteArrayToHexArray(codeDefault.getCode()), + byteArrayToHexArray(codeOptimized.getCode()))); + } + + private static boolean checkCode(InstalledCode codeDefault, InstalledCode codeOptimized) { + byte[] bytesDefault = codeDefault.getCode(); + byte[] bytesOptimized = codeOptimized.getCode(); + + if (bytesDefault.length > bytesOptimized.length) { + // code size reduction, so optimization must have worked + return true; + } + + return countLongJumpsHeuristically(bytesDefault) > countLongJumpsHeuristically(bytesOptimized); + } + + private static int countLongJumpsHeuristically(byte[] code) { + /* + * Counts opcodes for jmp and jcc in a raw code byte array. If a non-opcode byte looks like + * a jmp / jcc opcode, this would lead to false counts. + */ + int longJumps = 0; + for (int i = 0; i < code.length - 1; i++) { + if (isLongJmp(code[i]) || isLongJcc(code[i], code[i + 1])) { + longJumps++; + } + } + return longJumps; + } + + public static boolean isLongJmp(byte b) { + return b == 0xE9; + } + + public static boolean isLongJcc(byte b0, byte b1) { + return b0 == 0x0F && (b1 & 0xF0) == 0x80; + } + + private static String byteArrayToHexArray(byte[] code) { + String[] hex = new String[code.length]; + + for (int i = 0; i < code.length; i++) { + hex[i] = byteToHex(code[i]); + } + + return Arrays.toString(hex); + } + + private static String byteToHex(byte num) { + char[] hexDigits = new char[2]; + hexDigits[0] = Character.forDigit((num >> 4) & 0xF, 16); + hexDigits[1] = Character.forDigit((num & 0xF), 16); + return new String(hexDigits); + } +} diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64Assembler.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64Assembler.java index 3f7533443987..f12602af3818 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64Assembler.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64Assembler.java @@ -77,10 +77,15 @@ import static jdk.vm.ci.amd64.AMD64.CPUFeature.GFNI; import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD; +import java.util.ArrayList; import java.util.EnumSet; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import jdk.graal.compiler.asm.BranchTargetOutOfBoundsException; import jdk.graal.compiler.asm.Label; import jdk.graal.compiler.asm.amd64.AVXKind.AVXSize; +import jdk.graal.compiler.core.common.GraalOptions; import jdk.graal.compiler.core.common.Stride; import jdk.graal.compiler.core.common.calc.Condition; import jdk.graal.compiler.debug.Assertions; @@ -89,6 +94,9 @@ import jdk.graal.compiler.options.OptionKey; import jdk.graal.compiler.options.OptionType; import jdk.graal.compiler.options.OptionValues; + +import org.graalvm.collections.EconomicSet; + import jdk.vm.ci.amd64.AMD64; import jdk.vm.ci.amd64.AMD64.CPUFeature; import jdk.vm.ci.amd64.AMD64Kind; @@ -111,6 +119,7 @@ public static class Options { } private final boolean useBranchesWithin32ByteBoundary; + private boolean optimizeLongJumps; /** * Constructs an assembler for the AMD64 architecture. @@ -118,11 +127,13 @@ public static class Options { public AMD64Assembler(TargetDescription target) { super(target); useBranchesWithin32ByteBoundary = false; + optimizeLongJumps = GraalOptions.OptimizeLongJumps.getDefaultValue(); } public AMD64Assembler(TargetDescription target, OptionValues optionValues) { super(target); useBranchesWithin32ByteBoundary = Options.UseBranchesWithin32ByteBoundary.getValue(optionValues); + optimizeLongJumps = GraalOptions.OptimizeLongJumps.getValue(optionValues); } public AMD64Assembler(TargetDescription target, OptionValues optionValues, boolean hasIntelJccErratum) { @@ -132,6 +143,7 @@ public AMD64Assembler(TargetDescription target, OptionValues optionValues, boole } else { useBranchesWithin32ByteBoundary = hasIntelJccErratum; } + optimizeLongJumps = GraalOptions.OptimizeLongJumps.getValue(optionValues); } /** @@ -2834,8 +2846,8 @@ protected final int mitigateJCCErratum(int position, int bytesToEmit) { } public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { - final int shortSize = 2; - final int longSize = 6; + final int shortSize = JumpType.JCCB.instrSize; + final int longSize = JumpType.JCC.instrSize; long disp = jumpTarget - position(); if (!forceDisp32 && isByte(disp - shortSize)) { @@ -2844,8 +2856,10 @@ public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { disp = jumpTarget - position(); if (isByte(disp - shortSize)) { // 0111 tttn #8-bit disp + int pos = position(); emitByte(0x70 | cc.getValue()); emitByte((int) ((disp - shortSize) & 0xFF)); + trackJump(JumpType.JCCB, pos); return; } } @@ -2853,10 +2867,12 @@ public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { // 0000 1111 1000 tttn #32-bit disp assert forceDisp32 || isInt(disp - longSize) : "must be 32bit offset (call4)"; mitigateJCCErratum(longSize); + int pos = position(); disp = jumpTarget - position(); emitByte(0x0F); emitByte(0x80 | cc.getValue()); emitInt((int) (disp - longSize)); + trackJump(JumpType.JCC, pos); } /** @@ -2865,15 +2881,19 @@ public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { */ public final void jcc(ConditionFlag cc) { annotatePatchingImmediate(2, 4); + int pos = position(); emitByte(0x0F); emitByte(0x80 | cc.getValue()); emitInt(0); + trackJump(JumpType.JCC, pos); } public final void jcc(ConditionFlag cc, Label l) { assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc"; if (l.isBound()) { jcc(cc, l.position(), false); + } else if (canUseShortJump(nextJumpIdx)) { + jccb(cc, l); } else { mitigateJCCErratum(6); // Note: could eliminate cond. jumps to this jump if condition @@ -2881,9 +2901,11 @@ public final void jcc(ConditionFlag cc, Label l) { // Note: use jccb() if label to be bound is very close to get // an 8-bit displacement l.addPatchAt(position(), this); + int pos = position(); emitByte(0x0F); emitByte(0x80 | cc.getValue()); emitInt(0); + trackJump(JumpType.JCC, pos); } } @@ -2892,8 +2914,9 @@ public final void jccb(ConditionFlag cc, Label l) { jcc(cc, l); return; } - final int shortSize = 2; + final int shortSize = JumpType.JCCB.instrSize; mitigateJCCErratum(shortSize); + int pos = position(); if (l.isBound()) { int entry = l.position(); assert isByte(entry - (position() + shortSize)) : "Displacement too large for a short jmp: " + (entry - (position() + shortSize)); @@ -2901,10 +2924,12 @@ public final void jccb(ConditionFlag cc, Label l) { // 0111 tttn #8-bit disp emitByte(0x70 | cc.getValue()); emitByte((int) ((disp - shortSize) & 0xFF)); + trackJump(JumpType.JCCB, pos); } else { l.addPatchAt(position(), this); emitByte(0x70 | cc.getValue()); emitByte(0); + trackJump(JumpType.JCCB, pos); } } @@ -2925,8 +2950,8 @@ public final void jcc(ConditionFlag cc, Label branchTarget, boolean isShortJmp) * @return the position where the jmp instruction starts. */ public final int jmp(int jumpTarget, boolean forceDisp32) { - final int shortSize = 2; - final int longSize = 5; + final int shortSize = JumpType.JMPB.instrSize; + final int longSize = JumpType.JMP.instrSize; // For long jmp, the jmp instruction will cross the jcc-erratum-mitigation-boundary when the // current position is between [0x1b, 0x1f]. For short jmp [0x1e, 0x1f], which is covered by // the long jmp triggering range. @@ -2938,6 +2963,8 @@ public final int jmp(int jumpTarget, boolean forceDisp32) { if (isByte(disp - shortSize)) { emitByte(0xEB); emitByte((int) ((disp - shortSize) & 0xFF)); + trackJump(JumpType.JMPB, pos); + return pos; } } @@ -2947,6 +2974,7 @@ public final int jmp(int jumpTarget, boolean forceDisp32) { long disp = jumpTarget - pos; emitByte(0xE9); emitInt((int) (disp - longSize)); + trackJump(JumpType.JMP, pos); return pos; } @@ -2959,15 +2987,19 @@ public void halt() { public final void jmp(Label l) { if (l.isBound()) { jmp(l.position(), false); + } else if (canUseShortJump(nextJumpIdx)) { + jmpb(l); } else { // By default, forward jumps are always 32-bit displacements, since // we can't yet know where the label will be bound. If you're sure that // the forward jump will not run beyond 256 bytes, use jmpb to // force an 8-bit displacement. mitigateJCCErratum(5); - l.addPatchAt(position(), this); + int pos = position(); + l.addPatchAt(pos, this); emitByte(0xE9); emitInt(0); + trackJump(JumpType.JMP, pos); } } @@ -3052,18 +3084,22 @@ public final void jmpb(Label l) { jmp(l); return; } - final int shortSize = 2; + final int shortSize = JumpType.JMPB.instrSize; mitigateJCCErratum(shortSize); if (l.isBound()) { // Displacement is relative to byte just after jmpb instruction - int displacement = l.position() - position() - shortSize; + int pos = position(); + int displacement = l.position() - pos - shortSize; GraalError.guarantee(isByte(displacement), "Displacement too large to be encoded as a byte: %d", displacement); emitByte(0xEB); emitByte(displacement & 0xFF); + trackJump(JumpType.JMPB, pos); } else { - l.addPatchAt(position(), this); + int pos = position(); + l.addPatchAt(pos, this); emitByte(0xEB); emitByte(0); + trackJump(JumpType.JMPB, pos); } } @@ -4931,7 +4967,9 @@ protected final void patchJumpTarget(int branch, int branchTarget) { * Since a wrongly patched short branch can potentially lead to working but really bad * behaving code we should always fail with an exception instead of having an assert. */ - GraalError.guarantee(isByte(imm8), "Displacement too large to be encoded as a byte: %d", imm8); + if (!isByte(imm8)) { + throw new BranchTargetOutOfBoundsException(true, "Displacement too large to be encoded as a byte: %d", imm8); + } emitByte(imm8, branch + 1); } else { @@ -5651,4 +5689,204 @@ public final void vfpclassss(Register dst, Register mask, Register src, int imm8 emitModRM(dst, src); emitByte(imm8); } + + /** + * Wraps information for different jump instructions, such as the instruction and displacement + * size and the position of the displacement within the instruction. + */ + public enum JumpType { + JMP(5, 1, 4), + JMPB(2, 1, 1), + JCC(6, 2, 4), + JCCB(2, 1, 1); + + JumpType(int instrSize, int dispPos, int dispSize) { + assert instrSize == dispPos + dispSize : "Invalid JumpInfo: instrSize=" + instrSize + ", dispPos=" + dispPos + ", dispSize=" + dispSize; + this.instrSize = instrSize; + this.dispPos = dispPos; + this.dispSize = dispSize; + } + + /** + * Size of the instruction in bytes. + */ + public final int instrSize; + /** + * Size of the jump displacement in bytes. + */ + public final int dispSize; + /** + * Position (in bytes) of the jump displacement within the instruction. + */ + public final int dispPos; + } + + /** + * Collects information about emitted jumps. Used for optimizing long jumps in a second code + * emit pass. + */ + public static class JumpInfo { + /** + * Accounts for unknown alignments when deciding if a forward jump should be emitted with a + * single byte displacement (called "short" jump). Only forward jumps with displacements < + * (127 - {@link #ALIGNMENT_COMPENSATION_HEURISTIC}) are emitted as short jumps. + */ + private static final int ALIGNMENT_COMPENSATION_HEURISTIC = 32; + + /** + * The index of this jump within the emitted code. Corresponds to the emit order, e.g., + * {@code idx = 5} denotes the 6th emitted jump. + */ + public final int jumpIdx; + + /** + * The position (bytes from the beginning of the method) of the instruction. + */ + public final int instrPos; + /** + * The type of the jump instruction. + */ + public final JumpType type; + /** + * The position (bytes from the beginning of the method) of the displacement. + */ + public int displacementPosition; + + private final AMD64Assembler asm; + + JumpInfo(int jumpIdx, JumpType type, int pos, AMD64Assembler asm) { + this.jumpIdx = jumpIdx; + this.type = type; + this.instrPos = pos; + this.displacementPosition = pos + type.dispPos; + this.asm = asm; + } + + /** + * Read the jump displacement from the code buffer. If the corresponding jump label has not + * been bound yet, the displacement is still uninitialized (=0). + */ + public int getDisplacement() { + switch (type.dispSize) { + case Byte.BYTES: + return asm.getByte(instrPos + type.dispPos); + case Integer.BYTES: + return asm.getInt(instrPos + type.dispPos); + default: + throw new RuntimeException("Unhandled jump displacement size: " + type.dispSize); + } + } + + /** + * Returns true if this jump fulfills all following conditions: + * + *