diff --git a/compiler/CHANGELOG.md b/compiler/CHANGELOG.md index bf34c6c73a15..55ab708dda28 100644 --- a/compiler/CHANGELOG.md +++ b/compiler/CHANGELOG.md @@ -4,6 +4,8 @@ This changelog summarizes newly introduced optimizations and other compiler rela ## GraalVM for JDK 23 (Internal Version 24.1.0) * (GR-50352): Added `-Djdk.graal.PrintPropertiesAll` to make `-XX:+JVMCIPrintProperties` show all Graal options. +* (GR-25968): New optimization for reducing code size on AMD64, by emitting smaller jump instructions if the displacement fits in one byte. + Enabled for Native Image O1-O3 per default; disabled elsewhere. Use `-Djdk.graal.OptimizeLongJumps=true` to enable. ## GraalVM for JDK 22 (Internal Version 24.0.0) * (GR-49876): Added `-Dgraal.PrintIntrinsics=true` to log the intrinsics used by Graal in the current runtime. diff --git a/compiler/src/jdk.graal.compiler.test/src/jdk/graal/compiler/asm/amd64/test/OptimizeLongJumpsTest.java b/compiler/src/jdk.graal.compiler.test/src/jdk/graal/compiler/asm/amd64/test/OptimizeLongJumpsTest.java new file mode 100644 index 000000000000..bef28e003ffc --- /dev/null +++ b/compiler/src/jdk.graal.compiler.test/src/jdk/graal/compiler/asm/amd64/test/OptimizeLongJumpsTest.java @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.graal.compiler.asm.amd64.test; + +import java.util.Arrays; + +import org.junit.Assume; +import org.junit.Before; +import org.junit.Test; + +import jdk.graal.compiler.asm.amd64.AMD64Assembler; +import jdk.graal.compiler.core.common.GraalOptions; +import jdk.graal.compiler.core.test.GraalCompilerTest; +import jdk.graal.compiler.hotspot.CompilerConfigurationFactory; +import jdk.graal.compiler.nodes.StructuredGraph; +import jdk.graal.compiler.nodes.StructuredGraph.AllowAssumptions; +import jdk.graal.compiler.options.OptionValues; +import jdk.vm.ci.amd64.AMD64; +import jdk.vm.ci.code.InstalledCode; +import jdk.vm.ci.code.InvalidInstalledCodeException; + +/** + * Regression tests for checking that + * {@link jdk.graal.compiler.core.common.GraalOptions#OptimizeLongJumps} replaces {@code jmp/jcc} + * instructions with {@code jmpb/jccb}. + */ +public class OptimizeLongJumpsTest extends GraalCompilerTest { + + @Before + public void checkAMD64() { + Assume.assumeTrue("skipping AMD64 specific test", getTarget().arch instanceof AMD64); + } + + public static int sideeffect = 0; + + public static int snippet01(int bound) { + for (int i = 0; i < bound; i++) { + sideeffect += i; + } + sideeffect = 0; + return sideeffect; + } + + public static int snippet02(boolean b, int x1, int x2) { + if (b) { + return x1; + } + return x2; + } + + @Test + public void test01() throws InvalidInstalledCodeException { + OptionValues options = new OptionValues(getInitialOptions(), AMD64Assembler.Options.UseBranchesWithin32ByteBoundary, true, CompilerConfigurationFactory.Options.CompilerConfiguration, + "economy"); + testOptimizeLongJumps("snippet01", options, 42); + } + + @Test + public void test02() throws InvalidInstalledCodeException { + OptionValues options = new OptionValues(getInitialOptions(), AMD64Assembler.Options.UseBranchesWithin32ByteBoundary, true, CompilerConfigurationFactory.Options.CompilerConfiguration, + "economy"); + testOptimizeLongJumps("snippet02", options, true, 1, 2); + } + + private void testOptimizeLongJumps(String method, OptionValues opts, Object... params) throws InvalidInstalledCodeException { + OptionValues optionsDefault = new OptionValues(opts, GraalOptions.OptimizeLongJumps, false); + StructuredGraph graphDefault = parseEager(method, AllowAssumptions.NO, optionsDefault); + InstalledCode codeDefault = null; + + OptionValues optionsOptimized = new OptionValues(opts, GraalOptions.OptimizeLongJumps, true); + StructuredGraph graphOptimized = parseEager(method, AllowAssumptions.NO, optionsOptimized); + InstalledCode codeOptimized = null; + + for (int i = 0; i < 3; i++) { + + /* + * Why using a loop: The optimization is considered successful, if there are fewer + * jmp/jcc instructions compared to the unoptimized code. To assert this condition, + * checkCode counts long jump / jcc opcodes in the raw code byte arrays. Thus, under + * rare circumstances, bytes from constants, displacements, etc can "look" like the + * opcodes we are searching for which can lead to false counts. If the success condition + * does not hold, we redo the code emits trying to rule out false positives and only + * fail if the success condition does not hold repeatedly. + */ + + codeDefault = getCode(graphDefault.method(), graphDefault, true, true, optionsDefault); + Object resultDefault = codeDefault.executeVarargs(params); + + codeOptimized = getCode(graphOptimized.method(), graphOptimized, true, true, optionsOptimized); + Object resultOptimized = codeOptimized.executeVarargs(params); + + assertTrue(String.format("Optimized code should behave identically! Result (default): %d | Result (optimized): %d", resultDefault, resultOptimized), resultDefault.equals(resultOptimized)); + if (checkCode(codeDefault, codeOptimized)) { + return; + } + } + fail(String.format("Optimized code should have fewer long jumps!\n\tDefault code: %s\n\tOptimized code: %s", byteArrayToHexArray(codeDefault.getCode()), + byteArrayToHexArray(codeOptimized.getCode()))); + } + + private static boolean checkCode(InstalledCode codeDefault, InstalledCode codeOptimized) { + byte[] bytesDefault = codeDefault.getCode(); + byte[] bytesOptimized = codeOptimized.getCode(); + + if (bytesDefault.length > bytesOptimized.length) { + // code size reduction, so optimization must have worked + return true; + } + + return countLongJumpsHeuristically(bytesDefault) > countLongJumpsHeuristically(bytesOptimized); + } + + private static int countLongJumpsHeuristically(byte[] code) { + /* + * Counts opcodes for jmp and jcc in a raw code byte array. If a non-opcode byte looks like + * a jmp / jcc opcode, this would lead to false counts. + */ + int longJumps = 0; + for (int i = 0; i < code.length - 1; i++) { + if (isLongJmp(code[i]) || isLongJcc(code[i], code[i + 1])) { + longJumps++; + } + } + return longJumps; + } + + public static boolean isLongJmp(byte b) { + return b == 0xE9; + } + + public static boolean isLongJcc(byte b0, byte b1) { + return b0 == 0x0F && (b1 & 0xF0) == 0x80; + } + + private static String byteArrayToHexArray(byte[] code) { + String[] hex = new String[code.length]; + + for (int i = 0; i < code.length; i++) { + hex[i] = byteToHex(code[i]); + } + + return Arrays.toString(hex); + } + + private static String byteToHex(byte num) { + char[] hexDigits = new char[2]; + hexDigits[0] = Character.forDigit((num >> 4) & 0xF, 16); + hexDigits[1] = Character.forDigit((num & 0xF), 16); + return new String(hexDigits); + } +} diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64Assembler.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64Assembler.java index 3f7533443987..f12602af3818 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64Assembler.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64Assembler.java @@ -77,10 +77,15 @@ import static jdk.vm.ci.amd64.AMD64.CPUFeature.GFNI; import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD; +import java.util.ArrayList; import java.util.EnumSet; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import jdk.graal.compiler.asm.BranchTargetOutOfBoundsException; import jdk.graal.compiler.asm.Label; import jdk.graal.compiler.asm.amd64.AVXKind.AVXSize; +import jdk.graal.compiler.core.common.GraalOptions; import jdk.graal.compiler.core.common.Stride; import jdk.graal.compiler.core.common.calc.Condition; import jdk.graal.compiler.debug.Assertions; @@ -89,6 +94,9 @@ import jdk.graal.compiler.options.OptionKey; import jdk.graal.compiler.options.OptionType; import jdk.graal.compiler.options.OptionValues; + +import org.graalvm.collections.EconomicSet; + import jdk.vm.ci.amd64.AMD64; import jdk.vm.ci.amd64.AMD64.CPUFeature; import jdk.vm.ci.amd64.AMD64Kind; @@ -111,6 +119,7 @@ public static class Options { } private final boolean useBranchesWithin32ByteBoundary; + private boolean optimizeLongJumps; /** * Constructs an assembler for the AMD64 architecture. @@ -118,11 +127,13 @@ public static class Options { public AMD64Assembler(TargetDescription target) { super(target); useBranchesWithin32ByteBoundary = false; + optimizeLongJumps = GraalOptions.OptimizeLongJumps.getDefaultValue(); } public AMD64Assembler(TargetDescription target, OptionValues optionValues) { super(target); useBranchesWithin32ByteBoundary = Options.UseBranchesWithin32ByteBoundary.getValue(optionValues); + optimizeLongJumps = GraalOptions.OptimizeLongJumps.getValue(optionValues); } public AMD64Assembler(TargetDescription target, OptionValues optionValues, boolean hasIntelJccErratum) { @@ -132,6 +143,7 @@ public AMD64Assembler(TargetDescription target, OptionValues optionValues, boole } else { useBranchesWithin32ByteBoundary = hasIntelJccErratum; } + optimizeLongJumps = GraalOptions.OptimizeLongJumps.getValue(optionValues); } /** @@ -2834,8 +2846,8 @@ protected final int mitigateJCCErratum(int position, int bytesToEmit) { } public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { - final int shortSize = 2; - final int longSize = 6; + final int shortSize = JumpType.JCCB.instrSize; + final int longSize = JumpType.JCC.instrSize; long disp = jumpTarget - position(); if (!forceDisp32 && isByte(disp - shortSize)) { @@ -2844,8 +2856,10 @@ public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { disp = jumpTarget - position(); if (isByte(disp - shortSize)) { // 0111 tttn #8-bit disp + int pos = position(); emitByte(0x70 | cc.getValue()); emitByte((int) ((disp - shortSize) & 0xFF)); + trackJump(JumpType.JCCB, pos); return; } } @@ -2853,10 +2867,12 @@ public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { // 0000 1111 1000 tttn #32-bit disp assert forceDisp32 || isInt(disp - longSize) : "must be 32bit offset (call4)"; mitigateJCCErratum(longSize); + int pos = position(); disp = jumpTarget - position(); emitByte(0x0F); emitByte(0x80 | cc.getValue()); emitInt((int) (disp - longSize)); + trackJump(JumpType.JCC, pos); } /** @@ -2865,15 +2881,19 @@ public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { */ public final void jcc(ConditionFlag cc) { annotatePatchingImmediate(2, 4); + int pos = position(); emitByte(0x0F); emitByte(0x80 | cc.getValue()); emitInt(0); + trackJump(JumpType.JCC, pos); } public final void jcc(ConditionFlag cc, Label l) { assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc"; if (l.isBound()) { jcc(cc, l.position(), false); + } else if (canUseShortJump(nextJumpIdx)) { + jccb(cc, l); } else { mitigateJCCErratum(6); // Note: could eliminate cond. jumps to this jump if condition @@ -2881,9 +2901,11 @@ public final void jcc(ConditionFlag cc, Label l) { // Note: use jccb() if label to be bound is very close to get // an 8-bit displacement l.addPatchAt(position(), this); + int pos = position(); emitByte(0x0F); emitByte(0x80 | cc.getValue()); emitInt(0); + trackJump(JumpType.JCC, pos); } } @@ -2892,8 +2914,9 @@ public final void jccb(ConditionFlag cc, Label l) { jcc(cc, l); return; } - final int shortSize = 2; + final int shortSize = JumpType.JCCB.instrSize; mitigateJCCErratum(shortSize); + int pos = position(); if (l.isBound()) { int entry = l.position(); assert isByte(entry - (position() + shortSize)) : "Displacement too large for a short jmp: " + (entry - (position() + shortSize)); @@ -2901,10 +2924,12 @@ public final void jccb(ConditionFlag cc, Label l) { // 0111 tttn #8-bit disp emitByte(0x70 | cc.getValue()); emitByte((int) ((disp - shortSize) & 0xFF)); + trackJump(JumpType.JCCB, pos); } else { l.addPatchAt(position(), this); emitByte(0x70 | cc.getValue()); emitByte(0); + trackJump(JumpType.JCCB, pos); } } @@ -2925,8 +2950,8 @@ public final void jcc(ConditionFlag cc, Label branchTarget, boolean isShortJmp) * @return the position where the jmp instruction starts. */ public final int jmp(int jumpTarget, boolean forceDisp32) { - final int shortSize = 2; - final int longSize = 5; + final int shortSize = JumpType.JMPB.instrSize; + final int longSize = JumpType.JMP.instrSize; // For long jmp, the jmp instruction will cross the jcc-erratum-mitigation-boundary when the // current position is between [0x1b, 0x1f]. For short jmp [0x1e, 0x1f], which is covered by // the long jmp triggering range. @@ -2938,6 +2963,8 @@ public final int jmp(int jumpTarget, boolean forceDisp32) { if (isByte(disp - shortSize)) { emitByte(0xEB); emitByte((int) ((disp - shortSize) & 0xFF)); + trackJump(JumpType.JMPB, pos); + return pos; } } @@ -2947,6 +2974,7 @@ public final int jmp(int jumpTarget, boolean forceDisp32) { long disp = jumpTarget - pos; emitByte(0xE9); emitInt((int) (disp - longSize)); + trackJump(JumpType.JMP, pos); return pos; } @@ -2959,15 +2987,19 @@ public void halt() { public final void jmp(Label l) { if (l.isBound()) { jmp(l.position(), false); + } else if (canUseShortJump(nextJumpIdx)) { + jmpb(l); } else { // By default, forward jumps are always 32-bit displacements, since // we can't yet know where the label will be bound. If you're sure that // the forward jump will not run beyond 256 bytes, use jmpb to // force an 8-bit displacement. mitigateJCCErratum(5); - l.addPatchAt(position(), this); + int pos = position(); + l.addPatchAt(pos, this); emitByte(0xE9); emitInt(0); + trackJump(JumpType.JMP, pos); } } @@ -3052,18 +3084,22 @@ public final void jmpb(Label l) { jmp(l); return; } - final int shortSize = 2; + final int shortSize = JumpType.JMPB.instrSize; mitigateJCCErratum(shortSize); if (l.isBound()) { // Displacement is relative to byte just after jmpb instruction - int displacement = l.position() - position() - shortSize; + int pos = position(); + int displacement = l.position() - pos - shortSize; GraalError.guarantee(isByte(displacement), "Displacement too large to be encoded as a byte: %d", displacement); emitByte(0xEB); emitByte(displacement & 0xFF); + trackJump(JumpType.JMPB, pos); } else { - l.addPatchAt(position(), this); + int pos = position(); + l.addPatchAt(pos, this); emitByte(0xEB); emitByte(0); + trackJump(JumpType.JMPB, pos); } } @@ -4931,7 +4967,9 @@ protected final void patchJumpTarget(int branch, int branchTarget) { * Since a wrongly patched short branch can potentially lead to working but really bad * behaving code we should always fail with an exception instead of having an assert. */ - GraalError.guarantee(isByte(imm8), "Displacement too large to be encoded as a byte: %d", imm8); + if (!isByte(imm8)) { + throw new BranchTargetOutOfBoundsException(true, "Displacement too large to be encoded as a byte: %d", imm8); + } emitByte(imm8, branch + 1); } else { @@ -5651,4 +5689,204 @@ public final void vfpclassss(Register dst, Register mask, Register src, int imm8 emitModRM(dst, src); emitByte(imm8); } + + /** + * Wraps information for different jump instructions, such as the instruction and displacement + * size and the position of the displacement within the instruction. + */ + public enum JumpType { + JMP(5, 1, 4), + JMPB(2, 1, 1), + JCC(6, 2, 4), + JCCB(2, 1, 1); + + JumpType(int instrSize, int dispPos, int dispSize) { + assert instrSize == dispPos + dispSize : "Invalid JumpInfo: instrSize=" + instrSize + ", dispPos=" + dispPos + ", dispSize=" + dispSize; + this.instrSize = instrSize; + this.dispPos = dispPos; + this.dispSize = dispSize; + } + + /** + * Size of the instruction in bytes. + */ + public final int instrSize; + /** + * Size of the jump displacement in bytes. + */ + public final int dispSize; + /** + * Position (in bytes) of the jump displacement within the instruction. + */ + public final int dispPos; + } + + /** + * Collects information about emitted jumps. Used for optimizing long jumps in a second code + * emit pass. + */ + public static class JumpInfo { + /** + * Accounts for unknown alignments when deciding if a forward jump should be emitted with a + * single byte displacement (called "short" jump). Only forward jumps with displacements < + * (127 - {@link #ALIGNMENT_COMPENSATION_HEURISTIC}) are emitted as short jumps. + */ + private static final int ALIGNMENT_COMPENSATION_HEURISTIC = 32; + + /** + * The index of this jump within the emitted code. Corresponds to the emit order, e.g., + * {@code idx = 5} denotes the 6th emitted jump. + */ + public final int jumpIdx; + + /** + * The position (bytes from the beginning of the method) of the instruction. + */ + public final int instrPos; + /** + * The type of the jump instruction. + */ + public final JumpType type; + /** + * The position (bytes from the beginning of the method) of the displacement. + */ + public int displacementPosition; + + private final AMD64Assembler asm; + + JumpInfo(int jumpIdx, JumpType type, int pos, AMD64Assembler asm) { + this.jumpIdx = jumpIdx; + this.type = type; + this.instrPos = pos; + this.displacementPosition = pos + type.dispPos; + this.asm = asm; + } + + /** + * Read the jump displacement from the code buffer. If the corresponding jump label has not + * been bound yet, the displacement is still uninitialized (=0). + */ + public int getDisplacement() { + switch (type.dispSize) { + case Byte.BYTES: + return asm.getByte(instrPos + type.dispPos); + case Integer.BYTES: + return asm.getInt(instrPos + type.dispPos); + default: + throw new RuntimeException("Unhandled jump displacement size: " + type.dispSize); + } + } + + /** + * Returns true if this jump fulfills all following conditions: + * + * + * The jump distance of replaceable jumps is reduced by + * {@link JumpInfo#ALIGNMENT_COMPENSATION_HEURISTIC} to heuristically compensate alignments. + */ + public boolean canBeOptimized() { + // check if suitable op: + if (type != JumpType.JCC && type != JumpType.JMP) { + return false; + } + + int displacement = getDisplacement(); + // backward jumps are already emitted in optimal size + if (displacement < 0) { + return false; + } + // Check if displacement (heuristically compensating alignments) fits in single byte. + return isByte(getDisplacement() + ALIGNMENT_COMPENSATION_HEURISTIC); + } + + public boolean isLongJmp() { + return asm.getByte(instrPos) == 0xE9; + } + + public boolean isLongJcc() { + return asm.getByte(instrPos) == 0x0F && (asm.getByte(instrPos + 1) & 0xF0) == 0x80; + } + } + + /** + * Emit order index of next jump (jmp | jcc) to be emitted. Used for finding the same jumps + * across different code emits. This requires the order of emitted code from the same LIR to be + * deterministic. + */ + private int nextJumpIdx = 0; + + /** + * Checks if the jump at the given index can be replaced by a equivalent instruction with + * smaller displacement size. This replacement can only be done if + * {@link AMD64BaseAssembler#force4ByteNonZeroDisplacements} allows emitting short jumps and if + * a previous code emit has found that this jump will have a sufficiently small displacement. + */ + private boolean canUseShortJump(int jumpIdx) { + return !force4ByteNonZeroDisplacements && optimizeLongJumps && longToShortJumps != null && longToShortJumps.contains(jumpIdx); + } + + /** + * Information about emitted jumps, which can be processed after patching of jump targets. + */ + private List jumpInfo = new ArrayList<>(); + + /** + * Stores the emit index of jumps which can be replaced by single byte displacement versions. + * Subsequent code emits can use this information to emit short jumps for these indices. + */ + private EconomicSet longToShortJumps = EconomicSet.create(); + + private void trackJump(JumpType type, int pos) { + jumpInfo.add(new JumpInfo(nextJumpIdx++, type, pos, this)); + } + + /** + * The maximum number of acceptable bailouts when optimizing long jumps. Only checked if + * assertions are enabled. + */ + private static final int MAX_OPTIMIZE_LONG_JUMPS_BAILOUTS = 20; + + /** + * Accumulated number of bailouts when optimizing long jumps. Such bailouts are ok in rare + * cases. Only checked if assertions are enabled. + */ + private static AtomicInteger optimizeLongJumpsBailouts = new AtomicInteger(0); + + /** + * Disables optimizing long jumps for this compilation. If assertions are enabled, checks that + * the accumulated number of bailouts when optimizing long jumps does not exceed + * {@link AMD64Assembler#MAX_OPTIMIZE_LONG_JUMPS_BAILOUTS}. This would indicate a regression in + * the algorithm / the heuristics. + */ + public void disableOptimizeLongJumpsAfterException() { + assert optimizeLongJumpsBailouts.incrementAndGet() < MAX_OPTIMIZE_LONG_JUMPS_BAILOUTS : "Replacing 4byte-displacement jumps with 1byte-displacement jumps has resulted in too many BranchTargetOutOfBoundsExceptions. " + + "Please check the algorithm or disable the optimization by setting OptimizeLongJumps=false!"; + optimizeLongJumps = false; + } + + @Override + public void reset() { + if (optimizeLongJumps) { + longToShortJumps.clear(); + for (JumpInfo j : jumpInfo) { + if (j.canBeOptimized()) { + /* + * Mark the indices of emitted jumps (jmp | jcc) which could have been replaced + * by short jumps (8bit displacement). The order in which jumps are emitted from + * the same LIR is required to be deterministic! + */ + longToShortJumps.add(j.jumpIdx); + } + } + } + super.reset(); + nextJumpIdx = 0; + jumpInfo = new ArrayList<>(); + } + } diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/code/CompilationResult.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/code/CompilationResult.java index c5bdb565a13a..d27b0bee5520 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/code/CompilationResult.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/code/CompilationResult.java @@ -820,15 +820,24 @@ public boolean hasUnsafeAccess() { */ public void resetForEmittingCode() { checkOpen(); + // reset collections infopoints.clear(); sourceMapping.clear(); dataPatches.clear(); exceptionHandlers.clear(); marks.clear(); dataSection.clear(); + invalidCallDeoptimizationStates.clear(); if (annotations != null) { annotations.clear(); } + // reset fields + targetCode = null; + targetCodeSize = 0; + assumptions = null; + speculationLog = null; + methods = null; + bytecodeSize = 0; } public void clearInfopoints() { diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/core/common/GraalOptions.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/core/common/GraalOptions.java index 3a116ed90586..dbd9d495a759 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/core/common/GraalOptions.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/core/common/GraalOptions.java @@ -295,4 +295,7 @@ public final class GraalOptions { @Option(help = "Perform checks that guards and deopts aren't introduced in graphs that should handle exceptions explicitly", type = OptionType.Debug) public static final OptionKey StrictDeoptInsertionChecks = new OptionKey<>(false); + + @Option(help = "AMD64 only: Replace forward jumps (jmp, jcc) with equivalent but smaller instructions if the actual jump displacement fits in one byte.", type = OptionType.Expert) + public static final OptionKey OptimizeLongJumps = new OptionKey<>(false); } diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/hotspot/amd64/AMD64HotSpotBackend.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/hotspot/amd64/AMD64HotSpotBackend.java index e61d201865a4..595daa69f118 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/hotspot/amd64/AMD64HotSpotBackend.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/hotspot/amd64/AMD64HotSpotBackend.java @@ -31,6 +31,7 @@ import static jdk.vm.ci.code.ValueUtil.asRegister; import static jdk.graal.compiler.core.common.GraalOptions.ZapStackOnMethodEntry; +import jdk.graal.compiler.asm.BranchTargetOutOfBoundsException; import jdk.graal.compiler.asm.Label; import jdk.graal.compiler.asm.amd64.AMD64Address; import jdk.graal.compiler.asm.amd64.AMD64Assembler; @@ -39,6 +40,7 @@ import jdk.graal.compiler.asm.amd64.AMD64MacroAssembler; import jdk.graal.compiler.code.CompilationResult; import jdk.graal.compiler.core.amd64.AMD64NodeMatchRules; +import jdk.graal.compiler.core.common.GraalOptions; import jdk.graal.compiler.core.common.NumUtil; import jdk.graal.compiler.core.common.alloc.RegisterAllocationConfig; import jdk.graal.compiler.core.common.spi.ForeignCallLinkage; @@ -291,6 +293,13 @@ public CompilationResultBuilder newCompilationResultBuilder(LIRGenerationResult @Override public void emitCode(CompilationResultBuilder crb, ResolvedJavaMethod installedCodeOwner, EntryPointDecorator entryPointDecorator) { + emitCodeHelper(crb, installedCodeOwner, entryPointDecorator); + if (GraalOptions.OptimizeLongJumps.getValue(crb.getOptions())) { + optimizeLongJumps(crb, installedCodeOwner, entryPointDecorator); + } + } + + private void emitCodeHelper(CompilationResultBuilder crb, ResolvedJavaMethod installedCodeOwner, EntryPointDecorator entryPointDecorator) { AMD64MacroAssembler asm = (AMD64MacroAssembler) crb.asm; FrameMap frameMap = crb.frameMap; RegisterConfig regConfig = frameMap.getRegisterConfig(); @@ -436,4 +445,27 @@ public RegisterAllocationConfig newRegisterAllocationConfig(RegisterConfig regis RegisterConfig registerConfigNonNull = registerConfig == null ? getCodeCache().getRegisterConfig() : registerConfig; return new AMD64HotSpotRegisterAllocationConfig(registerConfigNonNull, allocationRestrictedTo, config.preserveFramePointer); } + + /** + * Performs a code emit from LIR and replaces jumps with 4byte displacement by equivalent + * instructions with single byte displacement, where possible. If any of these optimizations + * unexpectedly results in a {@link BranchTargetOutOfBoundsException}, code without any + * optimized jumps will be emitted. + */ + private void optimizeLongJumps(CompilationResultBuilder crb, ResolvedJavaMethod installedCodeOwner, EntryPointDecorator entryPointDecorator) { + // triggers a reset of the assembler during which replaceable jumps are identified + crb.resetForEmittingCode(); + try { + emitCodeHelper(crb, installedCodeOwner, entryPointDecorator); + } catch (BranchTargetOutOfBoundsException e) { + /* + * Alignments have invalidated the assumptions regarding short jumps. Trigger fail-safe + * mode and emit unoptimized code. + */ + AMD64MacroAssembler masm = (AMD64MacroAssembler) crb.asm; + masm.disableOptimizeLongJumpsAfterException(); + crb.resetForEmittingCode(); + emitCodeHelper(crb, installedCodeOwner, entryPointDecorator); + } + } } diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/asm/CompilationResultBuilder.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/asm/CompilationResultBuilder.java index b2dea7ba8ddf..545c5c13d5f2 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/asm/CompilationResultBuilder.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/asm/CompilationResultBuilder.java @@ -638,6 +638,14 @@ public void resetForEmittingCode() { currentBlockIndex = 0; lastImplicitExceptionOffset = Integer.MIN_VALUE; lir.resetLabels(); + needsMHDeoptHandler = false; + conservativeLabelOffsets = false; + if (labelBindLirPositions != null) { + labelBindLirPositions.clear(); + } + if (lirPositions != null) { + lirPositions.clear(); + } } public OptionValues getOptions() { diff --git a/substratevm/src/com.oracle.svm.core.graal.amd64/src/com/oracle/svm/core/graal/amd64/SubstrateAMD64Backend.java b/substratevm/src/com.oracle.svm.core.graal.amd64/src/com/oracle/svm/core/graal/amd64/SubstrateAMD64Backend.java index 7264cf7d2637..b2096467096e 100644 --- a/substratevm/src/com.oracle.svm.core.graal.amd64/src/com/oracle/svm/core/graal/amd64/SubstrateAMD64Backend.java +++ b/substratevm/src/com.oracle.svm.core.graal.amd64/src/com/oracle/svm/core/graal/amd64/SubstrateAMD64Backend.java @@ -91,6 +91,7 @@ import com.oracle.svm.core.thread.VMThreads.StatusSupport; import com.oracle.svm.core.util.VMError; +import jdk.graal.compiler.asm.BranchTargetOutOfBoundsException; import jdk.graal.compiler.asm.Label; import jdk.graal.compiler.asm.amd64.AMD64Address; import jdk.graal.compiler.asm.amd64.AMD64Assembler; @@ -106,6 +107,7 @@ import jdk.graal.compiler.core.amd64.AMD64NodeMatchRules; import jdk.graal.compiler.core.common.CompilationIdentifier; import jdk.graal.compiler.core.common.CompressEncoding; +import jdk.graal.compiler.core.common.GraalOptions; import jdk.graal.compiler.core.common.LIRKind; import jdk.graal.compiler.core.common.Stride; import jdk.graal.compiler.core.common.alloc.RegisterAllocationConfig; @@ -1528,6 +1530,9 @@ public CompiledCode createCompiledCode(ResolvedJavaMethod method, CompilationReq @Override public void emitCode(CompilationResultBuilder crb, ResolvedJavaMethod installedCodeOwner, EntryPointDecorator entryPointDecorator) { crb.emitLIR(); + if (GraalOptions.OptimizeLongJumps.getValue(crb.getOptions())) { + optimizeLongJumps(crb); + } } private AMD64Assembler createAssemblerNoOptions() { @@ -1535,6 +1540,10 @@ private AMD64Assembler createAssemblerNoOptions() { return createAssembler(o); } + protected void resetForEmittingCode(CompilationResultBuilder crb) { + crb.resetForEmittingCode(); + } + @Override public CompilationResult createJNITrampolineMethod(ResolvedJavaMethod method, CompilationIdentifier identifier, RegisterValue threadArg, int threadIsolateOffset, RegisterValue methodIdArg, int methodObjEntryPointOffset) { @@ -1576,4 +1585,28 @@ public CompilationResult createJNITrampolineMethod(ResolvedJavaMethod method, Co result.setTotalFrameSize(getTarget().wordSize); // not really, but 0 not allowed return result; } + + /** + * Performs a code emit from LIR and replaces jumps with 4byte displacement by equivalent + * instructions with single byte displacement, where possible. If any of these optimizations + * unexpectedly results in a {@link BranchTargetOutOfBoundsException}, code without any + * optimized jumps will be emitted. + */ + @SuppressWarnings("static-method") + private void optimizeLongJumps(CompilationResultBuilder crb) { + // Triggers a reset of the assembler during which replaceable jumps are identified. + resetForEmittingCode(crb); + try { + crb.emitLIR(); + } catch (BranchTargetOutOfBoundsException e) { + /* + * Alignments have invalidated the assumptions regarding short jumps. Trigger fail-safe + * mode and emit unoptimized code. + */ + AMD64MacroAssembler masm = (AMD64MacroAssembler) crb.asm; + masm.disableOptimizeLongJumpsAfterException(); + crb.resetForEmittingCode(); + crb.emitLIR(); + } + } } diff --git a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/SubstrateOptions.java b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/SubstrateOptions.java index 584df260cb63..1d3d2e08baeb 100644 --- a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/SubstrateOptions.java +++ b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/SubstrateOptions.java @@ -262,6 +262,7 @@ protected void onValueUpdate(EconomicMap, Object> values, String ol SubstrateOptions.IncludeNodeSourcePositions.update(values, newLevel == OptimizationLevel.O0); SubstrateOptions.SourceLevelDebug.update(values, newLevel == OptimizationLevel.O0); SubstrateOptions.AOTTrivialInline.update(values, newLevel != OptimizationLevel.O0); + GraalOptions.OptimizeLongJumps.update(values, !newLevel.isOneOf(OptimizationLevel.O0, OptimizationLevel.BUILD_TIME)); if (optimizeValueUpdateHandler != null) { optimizeValueUpdateHandler.onValueUpdate(values, newLevel); }