From f7db7b1eae84ecb57dfc673234dfd8ea67c080be Mon Sep 17 00:00:00 2001 From: Yudi Zheng Date: Mon, 29 Aug 2022 20:20:17 +0200 Subject: [PATCH 1/4] Port GHASH stub. --- compiler/mx.compiler/suite.py | 5 +- .../asm/aarch64/AArch64ASIMDAssembler.java | 63 ++ .../compiler/asm/amd64/AMD64Assembler.java | 118 ++- .../asm/amd64/AMD64MacroAssembler.java | 4 + .../core/aarch64/AArch64LIRGenerator.java | 6 + .../core/amd64/AMD64LIRGenerator.java | 6 + .../AArch64HotSpotForeignCallsProvider.java | 15 + .../AMD64HotSpotForeignCallsProvider.java | 14 + .../hotspot/test/HotSpotGHASHTest.java | 203 +++++ .../meta/HotSpotGraphBuilderPlugins.java | 20 - .../meta/HotSpotHostForeignCallsProvider.java | 11 - .../meta/UnimplementedGraalIntrinsics.java | 30 +- .../hotspot/stubs/IntrinsicStubs.java | 2 + .../lir/aarch64/AArch64AESDecryptOp.java | 9 +- .../lir/aarch64/AArch64AESEncryptOp.java | 108 ++- .../aarch64/AArch64GHASHProcessBlocksOp.java | 773 ++++++++++++++++++ .../lir/amd64/AMD64ArrayCompareToOp.java | 12 +- .../lir/amd64/AMD64EncodeArrayOp.java | 5 +- .../lir/amd64/AMD64GHASHProcessBlocksOp.java | 575 +++++++++++++ .../lir/amd64/AMD64HasNegativesOp.java | 5 +- .../compiler/lir/gen/LIRGeneratorTool.java | 5 + .../aarch64/AArch64GraphBuilderPlugins.java | 19 +- .../amd64/AMD64GraphBuilderPlugins.java | 25 +- .../StandardGraphBuilderPlugins.java | 20 + .../nodes/CryptoForeignCalls.java | 16 +- .../nodes/GHASHProcessBlocksNode.java | 105 +++ .../com/oracle/svm/core/cpufeature/Stubs.java | 10 + .../stubs/AARCH64StubForeignCallsFeature.java | 4 +- .../stubs/AMD64StubForeignCallsFeature.java | 6 +- .../svm/graal/stubs/SVMIntrinsicStubs.java | 2 + 30 files changed, 2076 insertions(+), 120 deletions(-) create mode 100644 compiler/src/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/HotSpotGHASHTest.java create mode 100644 compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64GHASHProcessBlocksOp.java create mode 100644 compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64GHASHProcessBlocksOp.java create mode 100644 compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/GHASHProcessBlocksNode.java diff --git a/compiler/mx.compiler/suite.py b/compiler/mx.compiler/suite.py index 3569b46ee659..4afacff8be1b 100644 --- a/compiler/mx.compiler/suite.py +++ b/compiler/mx.compiler/suite.py @@ -567,7 +567,8 @@ "dependencies" : [ "JVMCI_HOTSPOT", "org.graalvm.compiler.api.runtime", - "org.graalvm.compiler.replacements", + "org.graalvm.compiler.replacements.amd64", + "org.graalvm.compiler.replacements.aarch64", "org.graalvm.compiler.printer", "org.graalvm.compiler.runtime", ], @@ -635,7 +636,6 @@ "dependencies" : [ "org.graalvm.compiler.core.aarch64", "org.graalvm.compiler.hotspot", - "org.graalvm.compiler.replacements.aarch64", ], "requires" : [ "jdk.unsupported" # sun.misc.Unsafe @@ -653,7 +653,6 @@ "sourceDirs" : ["src"], "dependencies" : [ "org.graalvm.compiler.core.amd64", - "org.graalvm.compiler.replacements.amd64", "org.graalvm.compiler.hotspot", ], "requiresConcealed" : { diff --git a/compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDAssembler.java b/compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDAssembler.java index ec4527099aef..1166722f1d1f 100644 --- a/compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDAssembler.java +++ b/compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDAssembler.java @@ -573,6 +573,7 @@ public enum ASIMDInstruction { CMLT_ZERO(0b01010 << 12), ABS(0b01011 << 12), XTN(0b10010 << 12), + PMULL(0b1110 << 12), /* size 0x */ FCVTN(0b10110 << 12), FCVTL(0b10111 << 12), @@ -590,6 +591,8 @@ public enum ASIMDInstruction { NEG(UBit | 0b01011 << 12), /* UBit 1, size 00 */ NOT(UBit | 0b00101 << 12), + /* UBit 1, size 01 */ + RBIT(UBit | 0b00101 << 12), /* UBit 1, size 1x */ FCMGE_ZERO(UBit | 0b01100 << 12), FCMLE_ZERO(UBit | 0b01101 << 12), @@ -845,6 +848,15 @@ private void copyEncoding(ASIMDInstruction instr, boolean setQBit, ElementSize e emitInt(instr.encoding | baseEncoding | qBit(setQBit) | imm5Encoding | rd(dst) | rs1(src)); } + private void copyEncoding(boolean setQBit, ElementSize eSize, Register dst, int indexDst, Register src, int indexSrc) { + assert indexDst >= 0 && indexDst < ASIMDSize.FullReg.bytes() / eSize.bytes(); + assert indexSrc >= 0 && indexSrc < ASIMDSize.FullReg.bytes() / eSize.bytes(); + int baseEncoding = 0b0_0_1_01110000_00000_0_0000_1_00000_00000; + int imm5Encoding = (indexDst * 2 * eSize.bytes() | eSize.bytes()) << 16; + int imm4Encoding = (indexSrc * eSize.bytes()) << 11; + emitInt(imm4Encoding | baseEncoding | qBit(setQBit) | imm5Encoding | rd(dst) | rs1(src)); + } + private void twoRegMiscEncoding(ASIMDInstruction instr, ASIMDSize size, int eSizeEncoding, Register dst, Register src) { twoRegMiscEncoding(instr, size == ASIMDSize.FullReg, eSizeEncoding, dst, src); } @@ -2063,6 +2075,22 @@ public void fsubVVV(ASIMDSize size, ElementSize eSize, Register dst, Register sr threeSameEncoding(ASIMDInstruction.FSUB, size, elemSize1X(eSize), dst, src1, src2); } + /** + * C7.2.175 Insert vector element from another vector element.
+ * + * This instruction copies the vector element of the source register to the specified vector + * element of the destination register. + * + * @param eSize size of value to duplicate. + * @param dst SIMD register. + * @param indexDst offset of value to store. + * @param src SIMD register. + * @param indexSrc offset of value to duplicate. + */ + public void insVV(ElementSize eSize, Register dst, int indexDst, Register src, int indexSrc) { + copyEncoding(true, eSize, dst, indexDst, src, indexSrc); + } + /** * C7.2.176 Insert vector element from general-purpose register.
* @@ -2339,6 +2367,41 @@ public void orrVVV(ASIMDSize size, Register dst, Register src1, Register src2) { threeSameEncoding(ASIMDInstruction.ORR, size, elemSize10, dst, src1, src2); } + /** + * C7.2.215 Polynomial Multiply Long.
+ * + * This instruction multiplies corresponding elements in the lower or upper half of the vectors. + * + * @param size source register size. + * @param elementSize source element size. Must be ElementSize.Byte or ElementSize.DoubleWord. + * @param dst SIMD register. + * @param src1 SIMD register. + * @param src2 SIMD register. + */ + public void pmullVVV(ASIMDSize size, ElementSize elementSize, Register dst, Register src1, Register src2) { + assert dst.getRegisterCategory().equals(SIMD); + assert src1.getRegisterCategory().equals(SIMD); + assert src2.getRegisterCategory().equals(SIMD); + assert elementSize == ElementSize.Byte || elementSize == ElementSize.DoubleWord; + + threeDifferentEncoding(ASIMDInstruction.PMULL, size == ASIMDSize.FullReg, elemSizeXX(elementSize), dst, src1, src2); + } + + /** + * C7.2.218 Reverse Bit order.
+ * This instruction reverses the bits in each byte. + * + * @param size register size. + * @param dst SIMD register. + * @param src SIMD register. + */ + public void rbitVV(ASIMDSize size, Register dst, Register src) { + assert dst.getRegisterCategory().equals(SIMD); + assert src.getRegisterCategory().equals(SIMD); + + twoRegMiscEncoding(ASIMDInstruction.RBIT, size, elemSize01, dst, src); + } + /** * C7.2.219 Reverse elements in 16-bit halfwords.
* This instruction reverses the order of 8-bit elements in each halfword. diff --git a/compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64Assembler.java b/compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64Assembler.java index 0aa015ff8225..237c93bbdf1c 100644 --- a/compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64Assembler.java +++ b/compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64Assembler.java @@ -84,8 +84,8 @@ import java.util.EnumSet; import org.graalvm.compiler.asm.Label; -import org.graalvm.compiler.core.common.Stride; import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize; +import org.graalvm.compiler.core.common.Stride; import org.graalvm.compiler.core.common.calc.Condition; import org.graalvm.compiler.debug.GraalError; import org.graalvm.compiler.options.Option; @@ -1073,6 +1073,7 @@ private enum VEXOpAssertion { MASK_NULL_XMM_AVX512BW_VL(CPUFeature.AVX512VL, CPUFeature.AVX512VL, null, EVEXFeatureAssertion.AVX512F_BW_VL, MASK, null, XMM, null), MASK_NULL_XMM_AVX512DQ_VL(CPUFeature.AVX512VL, CPUFeature.AVX512VL, null, EVEXFeatureAssertion.AVX512F_DQ_VL, MASK, null, XMM, null), MASK_XMM_XMM_AVX512F_VL(CPUFeature.AVX512VL, CPUFeature.AVX512VL, null, EVEXFeatureAssertion.AVX512F_VL, MASK, XMM, XMM, null), + AVX1_128ONLY_CLMUL(CPUFeature.AVX, null, CPUFeature.CLMUL, null, XMM, XMM, XMM, XMM), AVX1_128ONLY_AES(CPUFeature.AVX, null, CPUFeature.AES, null, XMM, XMM, XMM, XMM); private final CPUFeature l128feature; @@ -1989,6 +1990,29 @@ public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, i } } + public static final class VexShiftImmOp extends VexOp implements VexRRIOp { + // @formatter:off + public static final VexShiftImmOp VPSLLDQ = new VexShiftImmOp("VPSLLDQ", P_66, M_0F, WIG, 0x73, 7, VEXOpAssertion.AVX1_AVX2_AVX512BW_VL, EVEXTuple.FVM, WIG); + public static final VexShiftImmOp VPSRLDQ = new VexShiftImmOp("VPSRLDQ", P_66, M_0F, WIG, 0x73, 3, VEXOpAssertion.AVX1_AVX2_AVX512BW_VL, EVEXTuple.FVM, WIG); + // @formatter:on + + private final int r; + + private VexShiftImmOp(String opcode, int pp, int mmmmm, int w, int op, int r, VEXOpAssertion assertion, EVEXTuple evexTuple, int wEvex) { + super(opcode, pp, mmmmm, w, op, assertion, evexTuple, wEvex); + this.r = r; + } + + @Override + public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) { + GraalError.guarantee(assertion.check(asm.getFeatures(), size, null, dst, src), "emitting invalid instruction"); + asm.vexPrefix(null, dst, src, size, pp, mmmmm, w, wEvex, false, assertion.l128feature, assertion.l256feature); + asm.emitByte(op); + asm.emitModRM(r, src); + asm.emitByte(imm8); + } + } + /** * Masked (i.e., conditional) SIMD loads and stores. */ @@ -2137,6 +2161,8 @@ public static final class VexRVMIOp extends VexOp { // AVX2 128-bit permutation public static final VexRVMIOp VPERM2I128 = new VexRVMIOp("VPERM2I128", P_66, M_0F3A, W0, 0x46, VEXOpAssertion.AVX2_256ONLY); + // Carry-Less Multiplication Quadword + public static final VexRVMIOp VPCLMULQDQ = new VexRVMIOp("VPCLMULQDQ", P_66, M_0F3A, WIG, 0x44, VEXOpAssertion.AVX1_128ONLY_CLMUL); // Packed Align Right public static final VexRVMIOp VPALIGNR = new VexRVMIOp("VPALIGNR", P_66, M_0F3A, WIG, 0x0F, VEXOpAssertion.AVX1_AVX2_AVX512BW_VL, EVEXTuple.FVM, WIG); @@ -3839,6 +3865,16 @@ public final void pslld(Register dst, int imm8) { emitByte(imm8 & 0xFF); } + public final void pslldq(Register dst, int imm8) { + assert isUByte(imm8) : "invalid value"; + assert inRC(XMM, dst); + // XMM7 is for /7 encoding: 66 0F 73 /7 ib + simdPrefix(AMD64.xmm7, dst, dst, PD, P_0F, false); + emitByte(0x73); + emitModRM(7, dst); + emitByte(imm8 & 0xFF); + } + public final void psllq(Register dst, Register shift) { assert inRC(XMM, dst) && inRC(XMM, shift); simdPrefix(dst, dst, shift, PD, P_0F, false); @@ -3969,6 +4005,39 @@ public final void punpcklbw(Register dst, Register src) { emitModRM(dst, src); } + public final void pclmulqdq(Register dst, Register src, int imm8) { + assert supports(CPUFeature.CLMUL); + assert inRC(XMM, dst) && inRC(XMM, src); + simdPrefix(dst, dst, src, PD, P_0F3A, false); + emitByte(0x44); + emitModRM(dst, src); + emitByte(imm8); + } + + public final void vpshufb(Register dst, Register src1, Register src2, AVXSize size) { + VexRVMOp.VPSHUFB.emit(this, size, dst, src1, src2); + } + + public final void vpclmulqdq(Register dst, Register nds, Register src, int imm8) { + VexRVMIOp.VPCLMULQDQ.emit(this, AVXSize.XMM, dst, nds, src, imm8); + } + + public final void vpclmullqlqdq(Register dst, Register nds, Register src) { + VexRVMIOp.VPCLMULQDQ.emit(this, AVXSize.XMM, dst, nds, src, 0x00); + } + + public final void vpclmulhqlqdq(Register dst, Register nds, Register src) { + VexRVMIOp.VPCLMULQDQ.emit(this, AVXSize.XMM, dst, nds, src, 0x01); + } + + public final void vpclmullqhqdq(Register dst, Register nds, Register src) { + VexRVMIOp.VPCLMULQDQ.emit(this, AVXSize.XMM, dst, nds, src, 0x10); + } + + public final void vpclmulhqhqdq(Register dst, Register nds, Register src) { + VexRVMIOp.VPCLMULQDQ.emit(this, AVXSize.XMM, dst, nds, src, 0x11); + } + public final void rcpps(Register dst, Register src) { assert inRC(XMM, dst) && inRC(XMM, src); simdPrefix(dst, Register.None, src, PS, P_0F, false); @@ -4106,6 +4175,10 @@ public final void unpcklpd(Register dst, Register src) { emitModRM(dst, src); } + public final void xorb(Register dst, AMD64Address src) { + XOR.rmOp.emit(this, BYTE, dst, src); + } + public final void xorl(Register dst, Register src) { XOR.rmOp.emit(this, DWORD, dst, src); } @@ -4666,6 +4739,17 @@ public final void call() { emitInt(0); } + public final void call(Label l) { + if (l.isBound()) { + emitByte(0xE8); + emitInt(l.position()); + } else { + l.addPatchAt(position(), this); + emitByte(0xE8); + emitInt(0); + } + } + public final void call(Register src) { prefix(src); emitByte(0xFF); @@ -4894,8 +4978,8 @@ public void clflushopt(AMD64Address adr) { emitOperandHelper(7, adr, 0); } - public final void vpand(Register dst, Register nds, Register src) { - VexRVMOp.VPAND.emit(this, AVXSize.YMM, dst, nds, src); + public final void vpand(Register dst, Register nds, Register src, AVXSize size) { + VexRVMOp.VPAND.emit(this, size, dst, nds, src); } public final void vpandn(Register dst, Register nds, Register src) { @@ -4906,16 +4990,16 @@ public final void vpor(Register dst, Register nds, Register src) { VexRVMOp.VPOR.emit(this, AVXSize.YMM, dst, nds, src); } - public final void vptest(Register dst, Register src) { - VexRMOp.VPTEST.emit(this, AVXSize.YMM, dst, src); + public final void vptest(Register dst, Register src, AVXSize size) { + VexRMOp.VPTEST.emit(this, size, dst, src); } - public final void vpxor(Register dst, Register nds, Register src) { - VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src); + public final void vpxor(Register dst, Register nds, Register src, AVXSize size) { + VexRVMOp.VPXOR.emit(this, size, dst, nds, src); } - public final void vpxor(Register dst, Register nds, AMD64Address src) { - VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src); + public final void vpxor(Register dst, Register nds, AMD64Address src, AVXSize size) { + VexRVMOp.VPXOR.emit(this, size, dst, nds, src); } public final void vpsllw(Register dst, Register src, int imm8) { @@ -4926,12 +5010,20 @@ public final void vpsrlw(Register dst, Register src, int imm8) { VexShiftOp.VPSRLW.emit(this, AVXSize.YMM, dst, src, imm8); } - public final void vpslld(Register dst, Register src, int imm8) { - VexShiftOp.VPSLLD.emit(this, AVXSize.YMM, dst, src, imm8); + public final void vpslld(Register dst, Register src, int imm8, AVXSize size) { + VexShiftOp.VPSLLD.emit(this, size, dst, src, imm8); + } + + public final void vpslldq(Register dst, Register src, int imm8, AVXSize size) { + VexShiftImmOp.VPSLLDQ.emit(this, size, dst, src, imm8); + } + + public final void vpsrld(Register dst, Register src, int imm8, AVXSize size) { + VexShiftOp.VPSRLD.emit(this, size, dst, src, imm8); } - public final void vpsrld(Register dst, Register src, int imm8) { - VexShiftOp.VPSRLD.emit(this, AVXSize.YMM, dst, src, imm8); + public final void vpsrldq(Register dst, Register src, int imm8, AVXSize size) { + VexShiftImmOp.VPSRLDQ.emit(this, size, dst, src, imm8); } public final void vpcmpeqb(Register dst, Register src1, Register src2) { diff --git a/compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64MacroAssembler.java b/compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64MacroAssembler.java index 70b15451686b..b95222cc0306 100644 --- a/compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64MacroAssembler.java +++ b/compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64MacroAssembler.java @@ -600,6 +600,10 @@ public final void testlAndJcc(Register src, int imm32, ConditionFlag cc, Label b applyMIOpAndJcc(AMD64MIOp.TEST, DWORD, src, imm32, cc, branchTarget, isShortJmp, false, null); } + public final void testqAndJcc(Register src, int imm32, ConditionFlag cc, Label branchTarget, boolean isShortJmp) { + applyMIOpAndJcc(AMD64MIOp.TEST, QWORD, src, imm32, cc, branchTarget, isShortJmp, false, null); + } + public final void testAndJcc(OperandSize size, AMD64Address src, int imm32, ConditionFlag cc, Label branchTarget, boolean isShortJmp, IntConsumer applyBeforeFusedPair) { applyMIOpAndJcc(AMD64MIOp.TEST, size, src, imm32, cc, branchTarget, isShortJmp, false, applyBeforeFusedPair); } diff --git a/compiler/src/org.graalvm.compiler.core.aarch64/src/org/graalvm/compiler/core/aarch64/AArch64LIRGenerator.java b/compiler/src/org.graalvm.compiler.core.aarch64/src/org/graalvm/compiler/core/aarch64/AArch64LIRGenerator.java index 753f83349f44..5f44068d7120 100644 --- a/compiler/src/org.graalvm.compiler.core.aarch64/src/org/graalvm/compiler/core/aarch64/AArch64LIRGenerator.java +++ b/compiler/src/org.graalvm.compiler.core.aarch64/src/org/graalvm/compiler/core/aarch64/AArch64LIRGenerator.java @@ -70,6 +70,7 @@ import org.graalvm.compiler.lir.aarch64.AArch64ControlFlow.RangeTableSwitchOp; import org.graalvm.compiler.lir.aarch64.AArch64ControlFlow.StrategySwitchOp; import org.graalvm.compiler.lir.aarch64.AArch64EncodeArrayOp; +import org.graalvm.compiler.lir.aarch64.AArch64GHASHProcessBlocksOp; import org.graalvm.compiler.lir.aarch64.AArch64Move; import org.graalvm.compiler.lir.aarch64.AArch64Move.MembarOp; import org.graalvm.compiler.lir.aarch64.AArch64PauseOp; @@ -581,6 +582,11 @@ public void emitAESDecrypt(Value from, Value to, Value key) { append(new AArch64AESDecryptOp(asAllocatable(from), asAllocatable(to), asAllocatable(key), getArrayLengthOffset() - getArrayBaseOffset(JavaKind.Int))); } + @Override + public void emitGHASHProcessBlocks(Value state, Value hashSubkey, Value data, Value blocks) { + append(new AArch64GHASHProcessBlocksOp(this, asAllocatable(state), asAllocatable(hashSubkey), asAllocatable(data), asAllocatable(blocks))); + } + @Override public void emitStringLatin1Inflate(Value src, Value dst, Value len) { append(new AArch64StringLatin1InflateOp(this, asAllocatable(src), asAllocatable(dst), asAllocatable(len))); diff --git a/compiler/src/org.graalvm.compiler.core.amd64/src/org/graalvm/compiler/core/amd64/AMD64LIRGenerator.java b/compiler/src/org.graalvm.compiler.core.amd64/src/org/graalvm/compiler/core/amd64/AMD64LIRGenerator.java index 29e5a4b98a32..f60a851631f0 100644 --- a/compiler/src/org.graalvm.compiler.core.amd64/src/org/graalvm/compiler/core/amd64/AMD64LIRGenerator.java +++ b/compiler/src/org.graalvm.compiler.core.amd64/src/org/graalvm/compiler/core/amd64/AMD64LIRGenerator.java @@ -101,6 +101,7 @@ import org.graalvm.compiler.lir.amd64.AMD64ControlFlow.TestByteBranchOp; import org.graalvm.compiler.lir.amd64.AMD64ControlFlow.TestConstBranchOp; import org.graalvm.compiler.lir.amd64.AMD64EncodeArrayOp; +import org.graalvm.compiler.lir.amd64.AMD64GHASHProcessBlocksOp; import org.graalvm.compiler.lir.amd64.AMD64HasNegativesOp; import org.graalvm.compiler.lir.amd64.AMD64LFenceOp; import org.graalvm.compiler.lir.amd64.AMD64Move; @@ -761,6 +762,11 @@ public void emitAESDecrypt(Value from, Value to, Value key) { append(new AMD64AESDecryptOp(this, asAllocatable(from), asAllocatable(to), asAllocatable(key), getArrayLengthOffset() - getArrayBaseOffset(JavaKind.Int))); } + @Override + public void emitGHASHProcessBlocks(Value state, Value hashSubkey, Value data, Value blocks) { + append(new AMD64GHASHProcessBlocksOp(this, asAllocatable(state), asAllocatable(hashSubkey), asAllocatable(data), asAllocatable(blocks))); + } + @SuppressWarnings("unchecked") protected boolean supports(EnumSet runtimeCheckedCPUFeatures, CPUFeature feature) { assert runtimeCheckedCPUFeatures == null || runtimeCheckedCPUFeatures.isEmpty() || runtimeCheckedCPUFeatures.iterator().next() instanceof CPUFeature; diff --git a/compiler/src/org.graalvm.compiler.hotspot.aarch64/src/org/graalvm/compiler/hotspot/aarch64/AArch64HotSpotForeignCallsProvider.java b/compiler/src/org.graalvm.compiler.hotspot.aarch64/src/org/graalvm/compiler/hotspot/aarch64/AArch64HotSpotForeignCallsProvider.java index 99b27bf70112..2a2af70cc3e2 100644 --- a/compiler/src/org.graalvm.compiler.hotspot.aarch64/src/org/graalvm/compiler/hotspot/aarch64/AArch64HotSpotForeignCallsProvider.java +++ b/compiler/src/org.graalvm.compiler.hotspot.aarch64/src/org/graalvm/compiler/hotspot/aarch64/AArch64HotSpotForeignCallsProvider.java @@ -32,6 +32,7 @@ import static org.graalvm.compiler.hotspot.HotSpotForeignCallLinkage.JUMP_ADDRESS; import static org.graalvm.compiler.hotspot.HotSpotForeignCallLinkage.RegisterEffect.COMPUTES_REGISTERS_KILLED; import static org.graalvm.compiler.hotspot.HotSpotForeignCallLinkage.RegisterEffect.DESTROYS_ALL_CALLER_SAVE_REGISTERS; +import static org.graalvm.compiler.hotspot.meta.HotSpotForeignCallDescriptor.Reexecutability.NOT_REEXECUTABLE; import static org.graalvm.compiler.hotspot.meta.HotSpotForeignCallDescriptor.Reexecutability.REEXECUTABLE; import static org.graalvm.compiler.hotspot.meta.HotSpotForeignCallDescriptor.Transition.LEAF; @@ -43,9 +44,12 @@ import org.graalvm.compiler.hotspot.meta.HotSpotProviders; import org.graalvm.compiler.hotspot.stubs.IntrinsicStubsGen; import org.graalvm.compiler.options.OptionValues; +import org.graalvm.compiler.replacements.aarch64.AArch64GraphBuilderPlugins; import org.graalvm.compiler.replacements.nodes.ArrayIndexOfForeignCalls; +import org.graalvm.compiler.replacements.nodes.CryptoForeignCalls; import org.graalvm.compiler.word.WordTypes; +import jdk.vm.ci.aarch64.AArch64; import jdk.vm.ci.code.CallingConvention; import jdk.vm.ci.code.CodeCacheProvider; import jdk.vm.ci.code.RegisterValue; @@ -82,6 +86,17 @@ public void initialize(HotSpotProviders providers, OptionValues options) { link(new IntrinsicStubsGen(options, providers, registerStubCall(descriptor.getSignature(), LEAF, REEXECUTABLE, COMPUTES_REGISTERS_KILLED, NO_LOCATIONS))); } + if (AArch64GraphBuilderPlugins.supportsAESPlugins((AArch64) target.arch)) { + for (ForeignCallDescriptor stub : CryptoForeignCalls.AES_STUBS) { + link(new IntrinsicStubsGen(options, providers, registerStubCall(stub.getSignature(), LEAF, NOT_REEXECUTABLE, COMPUTES_REGISTERS_KILLED, stub.getKilledLocations()))); + } + } + + if (AArch64GraphBuilderPlugins.supportsGHASHPlugins((AArch64) target.arch)) { + link(new IntrinsicStubsGen(options, providers, registerStubCall(CryptoForeignCalls.STUB_GHASH_PROCESS_BLOCKS.getSignature(), + LEAF, NOT_REEXECUTABLE, COMPUTES_REGISTERS_KILLED, CryptoForeignCalls.STUB_GHASH_PROCESS_BLOCKS.getKilledLocations()))); + } + super.initialize(providers, options); } diff --git a/compiler/src/org.graalvm.compiler.hotspot.amd64/src/org/graalvm/compiler/hotspot/amd64/AMD64HotSpotForeignCallsProvider.java b/compiler/src/org.graalvm.compiler.hotspot.amd64/src/org/graalvm/compiler/hotspot/amd64/AMD64HotSpotForeignCallsProvider.java index 723b56c8fdd9..636bdcf0422d 100644 --- a/compiler/src/org.graalvm.compiler.hotspot.amd64/src/org/graalvm/compiler/hotspot/amd64/AMD64HotSpotForeignCallsProvider.java +++ b/compiler/src/org.graalvm.compiler.hotspot.amd64/src/org/graalvm/compiler/hotspot/amd64/AMD64HotSpotForeignCallsProvider.java @@ -58,14 +58,17 @@ import org.graalvm.compiler.options.OptionValues; import org.graalvm.compiler.replacements.amd64.AMD64ArrayEqualsWithMaskForeignCalls; import org.graalvm.compiler.replacements.amd64.AMD64CalcStringAttributesForeignCalls; +import org.graalvm.compiler.replacements.amd64.AMD64GraphBuilderPlugins; import org.graalvm.compiler.replacements.nodes.ArrayCompareToForeignCalls; import org.graalvm.compiler.replacements.nodes.ArrayCopyWithConversionsForeignCalls; import org.graalvm.compiler.replacements.nodes.ArrayEqualsForeignCalls; import org.graalvm.compiler.replacements.nodes.ArrayIndexOfForeignCalls; import org.graalvm.compiler.replacements.nodes.ArrayRegionCompareToForeignCalls; +import org.graalvm.compiler.replacements.nodes.CryptoForeignCalls; import org.graalvm.compiler.replacements.nodes.VectorizedMismatchForeignCalls; import org.graalvm.compiler.word.WordTypes; +import jdk.vm.ci.amd64.AMD64; import jdk.vm.ci.code.CallingConvention; import jdk.vm.ci.code.CodeCacheProvider; import jdk.vm.ci.code.RegisterValue; @@ -108,6 +111,17 @@ public void initialize(HotSpotProviders providers, OptionValues options) { linkSnippetStubs(providers, options, AMD64HotspotIntrinsicStubsGen::new, AMD64ArrayEqualsWithMaskForeignCalls.STUBS); linkSnippetStubs(providers, options, AMD64HotspotIntrinsicStubsGen::new, AMD64CalcStringAttributesForeignCalls.STUBS); + if (AMD64GraphBuilderPlugins.supportsAESPlugins((AMD64) target.arch)) { + for (ForeignCallDescriptor stub : CryptoForeignCalls.AES_STUBS) { + link(new IntrinsicStubsGen(options, providers, registerStubCall(stub.getSignature(), LEAF, NOT_REEXECUTABLE, COMPUTES_REGISTERS_KILLED, stub.getKilledLocations()))); + } + } + + if (AMD64GraphBuilderPlugins.supportsGHASHPlugins((AMD64) target.arch)) { + link(new IntrinsicStubsGen(options, providers, registerStubCall(CryptoForeignCalls.STUB_GHASH_PROCESS_BLOCKS.getSignature(), + LEAF, NOT_REEXECUTABLE, COMPUTES_REGISTERS_KILLED, CryptoForeignCalls.STUB_GHASH_PROCESS_BLOCKS.getKilledLocations()))); + } + super.initialize(providers, options); } diff --git a/compiler/src/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/HotSpotGHASHTest.java b/compiler/src/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/HotSpotGHASHTest.java new file mode 100644 index 000000000000..7bf23fca0c25 --- /dev/null +++ b/compiler/src/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/HotSpotGHASHTest.java @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.graalvm.compiler.hotspot.test; + +import java.lang.reflect.Constructor; +import java.lang.reflect.Field; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.nio.ByteBuffer; + +import org.graalvm.compiler.api.test.Graal; +import org.graalvm.compiler.hotspot.meta.UnimplementedGraalIntrinsics; +import org.graalvm.compiler.runtime.RuntimeProvider; +import org.graalvm.compiler.test.AddExports; +import org.junit.Assume; +import org.junit.AssumptionViolatedException; +import org.junit.Before; +import org.junit.Test; + +import jdk.vm.ci.code.Architecture; +import jdk.vm.ci.code.InstalledCode; + +@AddExports("java.base/com.sun.crypto.provider") +public class HotSpotGHASHTest extends HotSpotGraalCompilerTest { + + private Class classGHASH; + private Constructor ghashConstructor; + private Method methodUpdate; + private Method methodDigest; + private Field fieldState; + + @Before + public void init() { + Architecture arch = Graal.getRequiredCapability(RuntimeProvider.class).getHostBackend().getTarget().arch; + Assume.assumeTrue(UnimplementedGraalIntrinsics.supportsGHASHPlugins(arch)); + try { + classGHASH = Class.forName("com.sun.crypto.provider.GHASH"); + ghashConstructor = classGHASH.getDeclaredConstructor(byte[].class); + ghashConstructor.setAccessible(true); + methodUpdate = classGHASH.getDeclaredMethod("update", byte[].class, int.class, int.class); + methodUpdate.setAccessible(true); + methodDigest = classGHASH.getDeclaredMethod("digest"); + methodDigest.setAccessible(true); + fieldState = classGHASH.getDeclaredField("state"); + fieldState.setAccessible(true); + } catch (ClassNotFoundException | NoSuchMethodException | NoSuchFieldException e) { + throw new AssumptionViolatedException(e.getMessage()); + } + } + + private static final String HEX_DIGITS = "0123456789abcdef"; + + private static byte[] bytes(String hex) { + StringBuilder sb = new StringBuilder(hex); + while ((sb.length() % 32) != 0) { + sb.append('0'); + } + String newHex = sb.toString(); + + byte[] result = new byte[newHex.length() / 2]; + for (int i = 0; i < result.length; ++i) { + int a = HEX_DIGITS.indexOf(newHex.charAt(2 * i)); + int b = HEX_DIGITS.indexOf(newHex.charAt(2 * i + 1)); + if ((a | b) < 0) { + if (a < 0) { + throw new AssertionError("bad character " + (int) newHex.charAt(2 * i)); + } + throw new AssertionError("bad character " + (int) newHex.charAt(2 * i + 1)); + } + result[i] = (byte) ((a << 4) | b); + } + return result; + } + + private static byte[] bytes(long l0, long l1) { + return ByteBuffer.allocate(16).putLong(l0).putLong(l1).array(); + } + + private Result ghash(Object ghash, long[] initState, byte[]... inputs) { + try { + long[] state = (long[]) fieldState.get(ghash); + System.arraycopy(initState, 0, state, 0, 2); + for (byte[] input : inputs) { + methodUpdate.invoke(ghash, input, 0, input.length); + } + return new Result(methodDigest.invoke(ghash), null); + } catch (Exception e) { + return new Result(null, e); + } + } + + private void testMultipleUpdateHelper(Object ghash, String strA, String strC, String result) { + long[] state = new long[]{0, 0}; + byte[][] inputs = new byte[][]{bytes(strA), bytes(strC), bytes(strA.length() * 4, strC.length() * 4)}; + assertTrue(result.length() == 32); + Result expected = new Result(bytes(result), null); + InstalledCode intrinsic = compileAndInstallSubstitution(classGHASH, "processBlocks"); + Result actual = ghash(ghash, state, inputs); + assertEquals(expected, actual); + intrinsic.invalidate(); + } + + @Test + public void testMultipleUpdate() throws InvocationTargetException, InstantiationException, IllegalAccessException { + Object ghash = ghashConstructor.newInstance(bytes("66e94bd4ef8a2c3b884cfa59ca342b2e")); + testMultipleUpdateHelper(ghash, "", "", "00000000000000000000000000000000"); + testMultipleUpdateHelper(ghash, "", "0388dace60b6a392f328c2b971b2fe78", "f38cbb1ad69223dcc3457ae5b6b0f885"); + + ghash = ghashConstructor.newInstance(bytes("b83b533708bf535d0aa6e52980d53b78")); + testMultipleUpdateHelper(ghash, + "", + "42831ec2217774244b7221b784d0d49c" + "e3aa212f2c02a4e035c17e2329aca12e" + "21d514b25466931c7d8f6a5aac84aa05" + "1ba30b396a0aac973d58e091473f5985", + "7f1b32b81b820d02614f8895ac1d4eac"); + testMultipleUpdateHelper(ghash, "feedfacedeadbeeffeedfacedeadbeef" + "abaddad2", + "42831ec2217774244b7221b784d0d49c" + "e3aa212f2c02a4e035c17e2329aca12e" + "21d514b25466931c7d8f6a5aac84aa05" + "1ba30b396a0aac973d58e091", + "698e57f70e6ecc7fd9463b7260a9ae5f"); + testMultipleUpdateHelper(ghash, + "feedfacedeadbeeffeedfacedeadbeef" + "abaddad2", + "61353b4c2806934a777ff51fa22a4755" + "699b2a714fcdc6f83766e5f97b6c7423" + "73806900e49f24b22b097544d4896b42" + "4989b5e1ebac0f07c23f4598", + "df586bb4c249b92cb6922877e444d37b"); + } + + private Result ghash(Object ghash, long[] initState, byte[] input, int inOff, int inLen) { + try { + long[] state = (long[]) fieldState.get(ghash); + System.arraycopy(initState, 0, state, 0, 2); + methodUpdate.invoke(ghash, input, inOff, inLen); + return new Result(methodDigest.invoke(ghash), null); + } catch (Exception e) { + return new Result(null, e); + } + } + + private void testGHASH(Object ghash, long[] initState, byte[] input, int inOff, int inLen) { + Result expected = ghash(ghash, initState, input, inOff, inLen); + InstalledCode intrinsic = compileAndInstallSubstitution(classGHASH, "processBlocks"); + Result actual = ghash(ghash, initState, input, inOff, inLen); + assertEquals(expected, actual); + intrinsic.invalidate(); + } + + @Test + public void testOffset() throws InvocationTargetException, InstantiationException, IllegalAccessException { + Object ghash = ghashConstructor.newInstance(bytes(-2549203865593441186L, -7934336365809252297L)); + + byte[] input = new byte[]{23, 3, 3, 0, 65, 112, -106, -54, 49, -74, -104, -65, -27, 85, 53, 64, 68, 112, -1, -91, 65, -93, -102, 126, 106, 24, -38, 10, 11, 110, -85, -123, -99, 121, 1, -100, + 6, -52, 17, -46, 50, -75, 69, 11, -119, -109, 60, -69, -125, -83, 79, 93, -88, 24, -28, 111, 39, -105, -13, -14, -5, -5, 65, 57, 6, -112, -96, 75, 28, 42, 64, 95, -5, -40, -64, + -83, -6, -30, -42, 108, 64, 3, -48, 62, 100, 89, 108, -39, 96, 86, -15, -11, 115, -96, -96, 122, 9, -102, 63, 9, 4, 88, -106, -77, 91, -54, 98, 22, -91, 70, 75, 23, -93, -87, + 107, -96, 32, -59, 5, -70, 61, -80, 76, -113, -115, -118, 36, -119, 32, -4, 14, 83, 18, -19, 17, 19, 57, -29, -40, 94, 13, -112, 103, 102, -96, 9, -81, -10, 91, 19, 2, 41, 108, + -95, 44, -98, 47, -60, 97, 27, 39, -61, 117, 42, -96, -45, 75, 115, -87, -85, -39, 14, -75, -111, -102, 76, -58, -35, -126, -122, -8, -55, 81, 56, -40, -16, 84, -93, 58, -44, + -60, 56, -17, -96, -83, -71, 86, -59, 111, -43, -7, 84, -58, -18, -109, -22, 6, -99, -92, -33, 9, 98, 8, -2, 47, -102, 53, 124, -85, 33, 60, -108, -102, -88, -33, 50, 96, -115, + 14, 46, 36, 88, -61, -118, 72, 57, 13, 27, 40, 93, 44, 110, 114, -83, 126, -21, 113, -15, -16, -103, -51, 118, 12, -9, -121, -108, 19, 5, 20, -122, -29, 35, 31, -50, -81, 85, + 57, -82, 25, 78, -24, -102, 74, -97, 107, -22, -92, 104, -76, 77, 37, -49, -114, -100, 122, -80, 79, -48, -119, 67, 72, 88, -12, 103, 107, 5, -14, -1, 56, -66, -102, 15, -72, + 41, 41, -74, -9, -56, 12, -68, -120, 43, -44, -85, -45, 79, -84, -58, -81, 97, 10, 2, 60, 1, -103, -10, -98, 123, 6, -65, 17, -46, -58, -41, 103, -24, -119, -89, -93, -115, -3, + -55, 38, -119, -88, 83, -36, 29, 28, -66, -121, 9, -32, -7, 112, 19, -58, -2, -119, -20, -9, 25, 36, -120, -10, -75, 80, 34, -29, 126, -105, -37, -28, 57, 66, 127, 118, 12, 53, + -9, -31, -33, 7, -82, 80, -60, -10, -17, -17, 94, 63, 46, 77, 71, 8, 85, -113, -33, -16, -68, 37, 64, -21, -91, 116, -125, -41, -43, 1, -89, 6, -53, -105, 47, -5, 59, 71, -115, + 108, 30, 125, 16, 52, 7, 87, -29, 111, 126, -42, 48, 114, 80, 54, 85, -45, 52, 37, -63, -59, 81, 55, 83, 67, -11, 68, -57, 91, -38, -40, 113, -25, 89, 86, -44, 53, -84, -48, + -120, -38, 21, -29, 103, -53, 32, -122, -32, -11, 20, 55, -32, -91, 99, -98, -45, -5, -94, 107, 120, 66, 90, -64, -7, 103, 122, -33, 44, -91, -80, -1, -98, 99, -71, 120, 10, + -114, 43, 58, -11, -69, -55, 65, -17, -113, -37, -51, 39, -117, 60, 3, -76, 87, 90, -27, 85, -82, -6, 89, -40, 77, -14, -124, 29, -9, 122, -97, 119, -126, 84, 116, 28, -45, + -50, 74, 107, 8, 8, 101, -124, 5, 56, 4, -125, 100, -4, -100, -11, -65, -8, -110, -27, 0, -106, -37, 29, 91, 35, 80, 88, 64, 117, -128, -91, -117, 5, -36, -27, -108, 29, 3, + 115, 95, -69, -53, -20, -122, 39, -21, -29, -128, -58, -94, -78, -100, -4, -58, -12, 104, -96, -98, -9, 0, 64, -7, 72, -127, -86, 76, 57, -36, -86, 39, -100, -126, -71, 13, + 116, -106, -71, -6, 66, -67, -85, -90, 92, 99, -47, -101, 16, -52, -90, -1, 84, -112, -36, -112, 114, -3, -126, 29, -121, 68, -37, -118, 7, -91, -50, -33, 23, -113, 68, -66, + -27, -30, -20, -78, 8, 43, -27, -62, -74, 22, 1, -53, 28, 114, -8, 54, -14, 120, 118, -70, -112, -23, 19, -2, 21, 126, -44, 20, -43, 75, 27, -92, 2, -84, 48, 108, 101, 39, 35, + -93, 16, 62, -58, -20, -24, 44, -109, 110, 95, -68, 73, -82, -125, -99, 26, -88, 16, -48, -125, 44, -68, -122, 57, 111, 8, 0, 43, 107, 122, 78, 57, -22, -77, 83, 115, 107, -87, + 112, 91, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + long[] state = new long[]{-2224758530180934284L, 2670573948063642579L}; + testGHASH(ghash, state, input, 5, input.length - 5); + } +} diff --git a/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/HotSpotGraphBuilderPlugins.java b/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/HotSpotGraphBuilderPlugins.java index b99bf245ee70..1bc097d3db50 100644 --- a/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/HotSpotGraphBuilderPlugins.java +++ b/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/HotSpotGraphBuilderPlugins.java @@ -35,7 +35,6 @@ import static org.graalvm.compiler.hotspot.HotSpotBackend.CRC_TABLE_LOCATION; import static org.graalvm.compiler.hotspot.HotSpotBackend.ELECTRONIC_CODEBOOK_DECRYPT_AESCRYPT; import static org.graalvm.compiler.hotspot.HotSpotBackend.ELECTRONIC_CODEBOOK_ENCRYPT_AESCRYPT; -import static org.graalvm.compiler.hotspot.HotSpotBackend.GHASH_PROCESS_BLOCKS; import static org.graalvm.compiler.hotspot.HotSpotBackend.UPDATE_BYTES_CRC32; import static org.graalvm.compiler.hotspot.HotSpotBackend.UPDATE_BYTES_CRC32C; import static org.graalvm.compiler.hotspot.meta.HotSpotGraphBuilderPlugins.CipherBlockChainingCryptPlugin.readAESCryptKArrayStart; @@ -227,7 +226,6 @@ public void run() { registerBigIntegerPlugins(invocationPlugins, config, replacements); registerSHAPlugins(invocationPlugins, config, replacements); registerMD5Plugins(invocationPlugins, config, replacements); - registerGHASHPlugins(invocationPlugins, config, metaAccess, replacements); registerBase64Plugins(invocationPlugins, config, metaAccess, replacements); registerUnsafePlugins(invocationPlugins, config, replacements); StandardGraphBuilderPlugins.registerInvocationPlugins(snippetReflection, invocationPlugins, replacements, true, false, true, graalRuntime.getHostProviders().getLowerer()); @@ -942,24 +940,6 @@ private static void registerMD5Plugins(InvocationPlugins plugins, GraalHotSpotVM r.registerConditional(config.md5ImplCompress != 0L, new DigestInvocationPlugin(HotSpotBackend.MD5_IMPL_COMPRESS)); } - private static void registerGHASHPlugins(InvocationPlugins plugins, GraalHotSpotVMConfig config, MetaAccessProvider metaAccess, Replacements replacements) { - Registration r = new Registration(plugins, "com.sun.crypto.provider.GHASH", replacements); - r.registerConditional(config.useGHASHIntrinsics(), new InvocationPlugin("processBlocks", byte[].class, int.class, int.class, long[].class, long[].class) { - @Override - public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Receiver receiver, - ValueNode data, ValueNode inOffset, ValueNode blocks, ValueNode state, ValueNode hashSubkey) { - int longArrayBaseOffset = metaAccess.getArrayBaseOffset(JavaKind.Long); - int byteArrayBaseOffset = metaAccess.getArrayBaseOffset(JavaKind.Byte); - ValueNode dataOffset = AddNode.create(ConstantNode.forInt(byteArrayBaseOffset), inOffset, NodeView.DEFAULT); - ComputeObjectAddressNode dataAddress = b.add(new ComputeObjectAddressNode(data, dataOffset)); - ComputeObjectAddressNode stateAddress = b.add(new ComputeObjectAddressNode(state, ConstantNode.forInt(longArrayBaseOffset))); - ComputeObjectAddressNode hashSubkeyAddress = b.add(new ComputeObjectAddressNode(hashSubkey, ConstantNode.forInt(longArrayBaseOffset))); - b.add(new ForeignCallNode(GHASH_PROCESS_BLOCKS, stateAddress, hashSubkeyAddress, dataAddress, blocks)); - return true; - } - }); - } - private static void registerBase64Plugins(InvocationPlugins plugins, GraalHotSpotVMConfig config, MetaAccessProvider metaAccess, Replacements replacements) { Registration r = new Registration(plugins, "java.util.Base64$Encoder", replacements); r.registerConditional(config.base64EncodeBlock != 0L, new InvocationPlugin("encodeBlock", Receiver.class, byte[].class, int.class, int.class, byte[].class, int.class, boolean.class) { diff --git a/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/HotSpotHostForeignCallsProvider.java b/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/HotSpotHostForeignCallsProvider.java index b068e6c537a8..ba3069a5725f 100644 --- a/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/HotSpotHostForeignCallsProvider.java +++ b/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/HotSpotHostForeignCallsProvider.java @@ -38,7 +38,6 @@ import static org.graalvm.compiler.hotspot.HotSpotBackend.ELECTRONIC_CODEBOOK_DECRYPT_AESCRYPT; import static org.graalvm.compiler.hotspot.HotSpotBackend.ELECTRONIC_CODEBOOK_ENCRYPT_AESCRYPT; import static org.graalvm.compiler.hotspot.HotSpotBackend.EXCEPTION_HANDLER; -import static org.graalvm.compiler.hotspot.HotSpotBackend.GHASH_PROCESS_BLOCKS; import static org.graalvm.compiler.hotspot.HotSpotBackend.IC_MISS_HANDLER; import static org.graalvm.compiler.hotspot.HotSpotBackend.MD5_IMPL_COMPRESS; import static org.graalvm.compiler.hotspot.HotSpotBackend.MD5_IMPL_COMPRESS_MB; @@ -66,7 +65,6 @@ import static org.graalvm.compiler.hotspot.HotSpotBackend.UPDATE_BYTES_CRC32; import static org.graalvm.compiler.hotspot.HotSpotBackend.UPDATE_BYTES_CRC32C; import static org.graalvm.compiler.hotspot.HotSpotBackend.VM_ERROR; -import static org.graalvm.compiler.hotspot.HotSpotForeignCallLinkage.RegisterEffect.COMPUTES_REGISTERS_KILLED; import static org.graalvm.compiler.hotspot.HotSpotForeignCallLinkage.RegisterEffect.DESTROYS_ALL_CALLER_SAVE_REGISTERS; import static org.graalvm.compiler.hotspot.HotSpotHostBackend.DEOPT_BLOB_UNCOMMON_TRAP; import static org.graalvm.compiler.hotspot.HotSpotHostBackend.DEOPT_BLOB_UNPACK; @@ -119,7 +117,6 @@ import org.graalvm.compiler.hotspot.stubs.ExceptionHandlerStub; import org.graalvm.compiler.hotspot.stubs.IllegalArgumentExceptionArgumentIsNotAnArrayStub; import org.graalvm.compiler.hotspot.stubs.IntegerExactOverflowExceptionStub; -import org.graalvm.compiler.hotspot.stubs.IntrinsicStubsGen; import org.graalvm.compiler.hotspot.stubs.LongExactOverflowExceptionStub; import org.graalvm.compiler.hotspot.stubs.NegativeArraySizeExceptionStub; import org.graalvm.compiler.hotspot.stubs.NullPointerExceptionStub; @@ -132,7 +129,6 @@ import org.graalvm.compiler.options.OptionValues; import org.graalvm.compiler.replacements.SnippetTemplate; import org.graalvm.compiler.replacements.arraycopy.ArrayCopyForeignCalls; -import org.graalvm.compiler.replacements.nodes.CryptoForeignCalls; import org.graalvm.compiler.word.Word; import org.graalvm.compiler.word.WordTypes; import org.graalvm.word.LocationIdentity; @@ -530,9 +526,6 @@ public void initialize(HotSpotProviders providers, OptionValues options) { if (c.sha3ImplCompressMultiBlock != 0L) { registerForeignCall(SHA3_IMPL_COMPRESS_MB, c.sha3ImplCompressMultiBlock, NativeCall); } - if (c.useGHASHIntrinsics()) { - registerForeignCall(GHASH_PROCESS_BLOCKS, c.ghashProcessBlocks, NativeCall); - } if (c.base64EncodeBlock != 0L) { registerForeignCall(BASE64_ENCODE_BLOCK, c.base64EncodeBlock, NativeCall); } @@ -574,10 +567,6 @@ public void initialize(HotSpotProviders providers, OptionValues options) { registerForeignCall(ELECTRONIC_CODEBOOK_DECRYPT_AESCRYPT, c.electronicCodeBookDecrypt, NativeCall); } - for (ForeignCallDescriptor stub : CryptoForeignCalls.STUBS) { - link(new IntrinsicStubsGen(options, providers, registerStubCall(stub.getSignature(), LEAF, NOT_REEXECUTABLE, COMPUTES_REGISTERS_KILLED, stub.getKilledLocations()))); - } - if (c.useAESIntrinsics) { try { // These stubs do callee saving diff --git a/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/UnimplementedGraalIntrinsics.java b/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/UnimplementedGraalIntrinsics.java index 9b04482f7238..aa0dbd8db723 100644 --- a/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/UnimplementedGraalIntrinsics.java +++ b/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/UnimplementedGraalIntrinsics.java @@ -33,6 +33,8 @@ import org.graalvm.compiler.debug.GraalError; import org.graalvm.compiler.hotspot.GraalHotSpotVMConfig; import org.graalvm.compiler.nodes.graphbuilderconf.InvocationPlugin; +import org.graalvm.compiler.replacements.aarch64.AArch64GraphBuilderPlugins; +import org.graalvm.compiler.replacements.amd64.AMD64GraphBuilderPlugins; import org.graalvm.compiler.serviceprovider.JavaVersionUtil; import jdk.vm.ci.aarch64.AArch64; @@ -213,7 +215,7 @@ public UnimplementedGraalIntrinsics(GraalHotSpotVMConfig config, Architecture ar add(ignore, "com/sun/crypto/provider/CounterMode.implCrypt([BII[BI)I"); } - if (!config.useGHASHIntrinsics()) { + if (!supportsGHASHPlugins(arch)) { add(ignore, "com/sun/crypto/provider/GHASH.processBlocks([BII[J[J)V"); } @@ -239,10 +241,14 @@ public UnimplementedGraalIntrinsics(GraalHotSpotVMConfig config, Architecture ar } // AES intrinsics - if (!config.useAESIntrinsics) { + if (!supportsAESPlugins(arch)) { add(ignore, "com/sun/crypto/provider/AESCrypt.implDecryptBlock([BI[BI)V", - "com/sun/crypto/provider/AESCrypt.implEncryptBlock([BI[BI)V", + "com/sun/crypto/provider/AESCrypt.implEncryptBlock([BI[BI)V"); + } + + if (!config.useAESIntrinsics) { + add(ignore, "com/sun/crypto/provider/CipherBlockChaining.implDecrypt([BII[BI)I", "com/sun/crypto/provider/CipherBlockChaining.implEncrypt([BII[BI)I"); } @@ -493,4 +499,22 @@ public boolean isMissing(String method) { public boolean isDocumented(String method) { return isIgnored(method) || isImplementedInEnterprise(method) || isMissing(method) || isIgnored(method); } + + public static boolean supportsAESPlugins(Architecture arch) { + if (arch instanceof AMD64) { + return AMD64GraphBuilderPlugins.supportsAESPlugins((AMD64) arch); + } else if (arch instanceof AArch64) { + return AArch64GraphBuilderPlugins.supportsAESPlugins((AArch64) arch); + } + return false; + } + + public static boolean supportsGHASHPlugins(Architecture arch) { + if (arch instanceof AMD64) { + return AMD64GraphBuilderPlugins.supportsGHASHPlugins((AMD64) arch); + } else if (arch instanceof AArch64) { + return AArch64GraphBuilderPlugins.supportsGHASHPlugins((AArch64) arch); + } + return false; + } } diff --git a/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/stubs/IntrinsicStubs.java b/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/stubs/IntrinsicStubs.java index d80d3d50909a..0c9b687e6a04 100644 --- a/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/stubs/IntrinsicStubs.java +++ b/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/stubs/IntrinsicStubs.java @@ -32,6 +32,7 @@ import org.graalvm.compiler.replacements.nodes.ArrayIndexOfNode; import org.graalvm.compiler.replacements.nodes.ArrayRegionCompareToNode; import org.graalvm.compiler.replacements.nodes.ArrayRegionEqualsNode; +import org.graalvm.compiler.replacements.nodes.GHASHProcessBlocksNode; import org.graalvm.compiler.replacements.nodes.VectorizedMismatchNode; @GeneratedStubsHolder(targetVM = "hotspot", sources = { @@ -43,6 +44,7 @@ ArrayCopyWithConversionsNode.class, VectorizedMismatchNode.class, AESNode.class, + GHASHProcessBlocksNode.class, }) public final class IntrinsicStubs { } diff --git a/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64AESDecryptOp.java b/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64AESDecryptOp.java index b7367203a06d..5faf24c61d51 100644 --- a/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64AESDecryptOp.java +++ b/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64AESDecryptOp.java @@ -83,7 +83,14 @@ public AArch64AESDecryptOp(Value fromValue, Value toValue, Value keyValue, int l this.toValue = toValue; this.keyValue = keyValue; this.lengthOffset = lengthOffset; - this.temps = new Value[]{v0.asValue(), v1.asValue(), v2.asValue(), v3.asValue(), v4.asValue(), v5.asValue()}; + this.temps = new Value[]{ + v0.asValue(), + v1.asValue(), + v2.asValue(), + v3.asValue(), + v4.asValue(), + v5.asValue(), + }; } @Override diff --git a/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64AESEncryptOp.java b/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64AESEncryptOp.java index 86e451ddff60..414c1ddb2f19 100644 --- a/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64AESEncryptOp.java +++ b/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64AESEncryptOp.java @@ -24,6 +24,7 @@ */ package org.graalvm.compiler.lir.aarch64; +import static jdk.vm.ci.aarch64.AArch64.SIMD; import static jdk.vm.ci.aarch64.AArch64.v0; import static jdk.vm.ci.aarch64.AArch64.v17; import static jdk.vm.ci.aarch64.AArch64.v18; @@ -94,9 +95,24 @@ public AArch64AESEncryptOp(Value fromValue, Value toValue, Value keyValue, int l this.toValue = toValue; this.keyValue = keyValue; this.lengthOffset = lengthOffset; - this.temps = new Value[]{v0.asValue(), v17.asValue(), v18.asValue(), v19.asValue(), v20.asValue(), v21.asValue(), - v22.asValue(), v23.asValue(), v24.asValue(), v25.asValue(), v26.asValue(), v27.asValue(), - v28.asValue(), v29.asValue(), v30.asValue(), v31.asValue()}; + this.temps = new Value[]{ + v0.asValue(), + v17.asValue(), + v18.asValue(), + v19.asValue(), + v20.asValue(), + v21.asValue(), + v22.asValue(), + v23.asValue(), + v24.asValue(), + v25.asValue(), + v26.asValue(), + v27.asValue(), + v28.asValue(), + v29.asValue(), + v30.asValue(), + v31.asValue(), + }; } @Override @@ -117,11 +133,25 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) { // Increments from, to // Input data in v0, v1, ... // unrolls controls the number of times to unroll the generated function - new AESKernelGenerator(masm, 1, from, to, keylen, 0, 17).unroll(); + new AESKernelGenerator(masm, 1, from, to, keylen, v0, v17).unroll(); } } - private static void aesencLoadkeys(AArch64MacroAssembler masm, Register key, Register keylen) { + private static int indexOf(Register reg) { + assert SIMD.equals(reg.getRegisterCategory()); + for (int i = 0; i < AArch64.simdRegisters.size(); i++) { + if (reg.equals(AArch64.simdRegisters.get(i))) { + return i; + } + } + throw GraalError.shouldNotReachHere("unknown register "); + } + + static Register offset(Register base, int offset) { + return AArch64.simdRegisters.get(indexOf(base) + offset); + } + + static void aesencLoadkeys(AArch64MacroAssembler masm, Register key, Register keylen) { Label loadkeys44 = new Label(); Label loadkeys52 = new Label(); @@ -170,10 +200,10 @@ private static void aesencLoadkeys(AArch64MacroAssembler masm, Register key, Reg * {@link #generate(int)}, {@link #length()}, and {@link #next()} to generate unrolled and * interleaved functions. */ - public abstract static class KernelGenerator { + abstract static class KernelGenerator { protected final int unrolls; - public KernelGenerator(int unrolls) { + KernelGenerator(int unrolls) { this.unrolls = unrolls; } @@ -199,26 +229,26 @@ public void unroll() { } /** An unrolled and interleaved generator for AES encryption. */ - public static class AESKernelGenerator extends KernelGenerator { + static final class AESKernelGenerator extends KernelGenerator { private final AArch64MacroAssembler masm; private final Register from; private final Register to; private final Register keylen; - private final int data; - private final int subkeys; + private final Register data; + private final Register subkeys; private final boolean once; private final Label rounds44; private final Label rounds52; - public AESKernelGenerator(AArch64MacroAssembler masm, + AESKernelGenerator(AArch64MacroAssembler masm, int unrolls, Register from, Register to, Register keylen, - int data, - int subkeys, + Register data, + Register subkeys, boolean once) { super(unrolls); this.masm = masm; @@ -232,13 +262,13 @@ public AESKernelGenerator(AArch64MacroAssembler masm, this.rounds52 = new Label(); } - public AESKernelGenerator(AArch64MacroAssembler masm, + AESKernelGenerator(AArch64MacroAssembler masm, int unrolls, Register from, Register to, Register keylen, - int data, - int subkeys) { + Register data, + Register subkeys) { this(masm, unrolls, from, @@ -249,13 +279,9 @@ public AESKernelGenerator(AArch64MacroAssembler masm, true); } - private static Register getSimdRegister(int index) { - return AArch64.simdRegisters.get(index); - } - - private void aesRound(int input, int subkey) { - masm.neon.aese(getSimdRegister(input), getSimdRegister(subkey)); - masm.neon.aesmc(getSimdRegister(input), getSimdRegister(input)); + private void aesRound(Register input, Register subkey) { + masm.neon.aese(input, subkey); + masm.neon.aesmc(input, input); } @Override @@ -264,7 +290,7 @@ public void generate(int index) { case 0: if (!from.equals(Register.None)) { // get 16 bytes of input - masm.fldr(128, getSimdRegister(data), AArch64Address.createBaseRegisterOnlyAddress(128, from)); + masm.fldr(128, data, AArch64Address.createBaseRegisterOnlyAddress(128, from)); } break; case 1: @@ -275,10 +301,10 @@ public void generate(int index) { } break; case 2: - aesRound(data, subkeys + 0); + aesRound(data, offset(subkeys, 0)); break; case 3: - aesRound(data, subkeys + 1); + aesRound(data, offset(subkeys, 1)); break; case 4: if (once) { @@ -286,10 +312,10 @@ public void generate(int index) { } break; case 5: - aesRound(data, subkeys + 2); + aesRound(data, offset(subkeys, 2)); break; case 6: - aesRound(data, subkeys + 3); + aesRound(data, offset(subkeys, 3)); break; case 7: if (once) { @@ -297,41 +323,41 @@ public void generate(int index) { } break; case 8: - aesRound(data, subkeys + 4); + aesRound(data, offset(subkeys, 4)); break; case 9: - aesRound(data, subkeys + 5); + aesRound(data, offset(subkeys, 5)); break; case 10: - aesRound(data, subkeys + 6); + aesRound(data, offset(subkeys, 6)); break; case 11: - aesRound(data, subkeys + 7); + aesRound(data, offset(subkeys, 7)); break; case 12: - aesRound(data, subkeys + 8); + aesRound(data, offset(subkeys, 8)); break; case 13: - aesRound(data, subkeys + 9); + aesRound(data, offset(subkeys, 9)); break; case 14: - aesRound(data, subkeys + 10); + aesRound(data, offset(subkeys, 10)); break; case 15: - aesRound(data, subkeys + 11); + aesRound(data, offset(subkeys, 11)); break; case 16: - aesRound(data, subkeys + 12); + aesRound(data, offset(subkeys, 12)); break; case 17: - masm.neon.aese(getSimdRegister(data), getSimdRegister(subkeys + 13)); + masm.neon.aese(data, offset(subkeys, 13)); break; case 18: - masm.neon.eorVVV(ASIMDSize.FullReg, getSimdRegister(data), getSimdRegister(data), getSimdRegister(subkeys + 14)); + masm.neon.eorVVV(ASIMDSize.FullReg, data, data, offset(subkeys, 14)); break; case 19: if (!to.equals(Register.None)) { - masm.fstr(128, getSimdRegister(data), AArch64Address.createBaseRegisterOnlyAddress(128, to)); + masm.fstr(128, data, AArch64Address.createBaseRegisterOnlyAddress(128, to)); } break; default: @@ -346,7 +372,7 @@ public KernelGenerator next() { from, to, keylen, - data + 1, + offset(data, 1), subkeys, false); } diff --git a/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64GHASHProcessBlocksOp.java b/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64GHASHProcessBlocksOp.java new file mode 100644 index 000000000000..1aba5310a1fc --- /dev/null +++ b/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64GHASHProcessBlocksOp.java @@ -0,0 +1,773 @@ +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.graalvm.compiler.lir.aarch64; + +import static jdk.vm.ci.aarch64.AArch64.v0; +import static jdk.vm.ci.aarch64.AArch64.v1; +import static jdk.vm.ci.aarch64.AArch64.v2; +import static jdk.vm.ci.aarch64.AArch64.v24; +import static jdk.vm.ci.aarch64.AArch64.v28; +import static jdk.vm.ci.aarch64.AArch64.v29; +import static jdk.vm.ci.aarch64.AArch64.v3; +import static jdk.vm.ci.aarch64.AArch64.v30; +import static jdk.vm.ci.aarch64.AArch64.v31; +import static jdk.vm.ci.aarch64.AArch64.v4; +import static jdk.vm.ci.aarch64.AArch64.v5; +import static jdk.vm.ci.aarch64.AArch64.v6; +import static jdk.vm.ci.aarch64.AArch64.v7; +import static jdk.vm.ci.code.ValueUtil.asRegister; +import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_PAIR_SIGNED_SCALED; +import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED; +import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_SIGNED_UNSCALED; +import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG; +import static org.graalvm.compiler.lir.aarch64.AArch64AESEncryptOp.offset; + +import java.util.Arrays; + +import org.graalvm.compiler.asm.Label; +import org.graalvm.compiler.asm.aarch64.AArch64ASIMDAssembler.ASIMDSize; +import org.graalvm.compiler.asm.aarch64.AArch64ASIMDAssembler.ElementSize; +import org.graalvm.compiler.asm.aarch64.AArch64Address; +import org.graalvm.compiler.asm.aarch64.AArch64Assembler.ConditionFlag; +import org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler; +import org.graalvm.compiler.debug.GraalError; +import org.graalvm.compiler.lir.LIRInstructionClass; +import org.graalvm.compiler.lir.StubPort; +import org.graalvm.compiler.lir.asm.CompilationResultBuilder; +import org.graalvm.compiler.lir.gen.LIRGeneratorTool; + +import jdk.vm.ci.aarch64.AArch64; +import jdk.vm.ci.code.Register; +import jdk.vm.ci.meta.AllocatableValue; +import jdk.vm.ci.meta.Value; + +// @formatter:off +@StubPort(path = "src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp", + lineStart = 5823, + lineEnd = 5957, + commit = "27af0144ea57e86d9b81c2b328fad66e4a046f61", + sha1 = "f11f84b57df21c9b49473f204e11efc0e6da53d0") +@StubPort(path = "src/hotspot/cpu/aarch64/macroAssembler_aarch64_aes.cpp", + lineStart = 285, + lineEnd = 680, + commit = "27af0144ea57e86d9b81c2b328fad66e4a046f61", + sha1 = "087f57262da406b3d20e61d03eab5e9303dfba4c") +// @formatter:on +public final class AArch64GHASHProcessBlocksOp extends AArch64LIRInstruction { + + public static final LIRInstructionClass TYPE = LIRInstructionClass.create(AArch64GHASHProcessBlocksOp.class); + + private static final int REGISTER_STRIDE = 7; + + @Alive({REG}) private Value stateValue; + @Alive({REG}) private Value htblValue; + @Alive({REG}) private Value originalDataValue; + @Alive({REG}) private Value originalBlocksValue; + + @Temp({REG}) private Value dataValue; + @Temp({REG}) private Value blocksValue; + + @Temp protected Value[] temps; + + public AArch64GHASHProcessBlocksOp(LIRGeneratorTool tool, AllocatableValue stateValue, AllocatableValue htblValue, AllocatableValue originalDataValue, AllocatableValue originalBlocksValue) { + super(TYPE); + + this.stateValue = stateValue; + this.htblValue = htblValue; + this.originalDataValue = originalDataValue; + this.originalBlocksValue = originalBlocksValue; + + this.dataValue = tool.newVariable(originalDataValue.getValueKind()); + this.blocksValue = tool.newVariable(originalBlocksValue.getValueKind()); + + this.temps = Arrays.stream(AArch64.simdRegisters.toArray()).map(Register::asValue).toArray(Value[]::new); + } + + @Override + public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) { + Label labelSmall = new Label(); + Label labelDone = new Label(); + + Register state = asRegister(stateValue); + Register subkeyH = asRegister(htblValue); + Register originalData = asRegister(originalDataValue); + Register originalBlocks = asRegister(originalBlocksValue); + + Register data = asRegister(dataValue); + Register blocks = asRegister(blocksValue); + + masm.mov(64, data, originalData); + masm.mov(64, blocks, originalBlocks); + + masm.compare(32, blocks, 8); + masm.branchConditionally(ConditionFlag.LT, labelSmall); + + // No need to save/restore states as we already mark all SIMD registers as killed. + // masm.sub(64, sp, sp, 4 * 16); + // masm.neon.st1MultipleVVVV(ASIMDSize.FullReg, ElementSize.Byte, v12, v13, v14, v15, + // AArch64Address.createBaseRegisterOnlyAddress(AArch64Address.ANY_SIZE, sp)); + // masm.sub(64, sp, sp, 4 * 16); + // masm.neon.st1MultipleVVVV(ASIMDSize.FullReg, ElementSize.Byte, v8, v9, v10, v11, + // AArch64Address.createBaseRegisterOnlyAddress(AArch64Address.ANY_SIZE, sp)); + + ghashProcessBlocksWide(masm, state, subkeyH, data, blocks, 4); + + // masm.neon.ld1MultipleVVVV(ASIMDSize.FullReg, ElementSize.Byte, v8, v9, v10, v11, + // AArch64Address.createStructureImmediatePostIndexAddress(ASIMDInstruction.LD1_MULTIPLE_4R, + // ASIMDSize.FullReg, ElementSize.Byte, sp, 64)); + // masm.neon.ld1MultipleVVVV(ASIMDSize.FullReg, ElementSize.Byte, v12, v13, v14, v15, + // AArch64Address.createStructureImmediatePostIndexAddress(ASIMDInstruction.LD1_MULTIPLE_4R, + // ASIMDSize.FullReg, ElementSize.Byte, sp, 64)); + + masm.compare(32, blocks, 0); + masm.branchConditionally(ConditionFlag.LE, labelDone); + + masm.bind(labelSmall); + generateGhashProcessBlocks(masm, state, subkeyH, data, blocks); + masm.bind(labelDone); + } + + private static void generateGhashProcessBlocks(AArch64MacroAssembler masm, + Register state, + Register subkeyH, + Register data, + Register blocks) { + // Bafflingly, GCM uses little-endian for the byte order, but + // big-endian for the bit order. For example, the polynomial 1 is + // represented as the 16-byte string 80 00 00 00 | 12 bytes of 00. + // + // So, we must either reverse the bytes in each word and do + // everything big-endian or reverse the bits in each byte and do + // it little-endian. On AArch64 it's more idiomatic to reverse + // the bits in each byte (we have an instruction, RBIT, to do + // that) and keep the data in little-endian bit order through the + // calculation, bit-reversing the inputs and outputs. + Register vzr = v30; + masm.neon.eorVVV(ASIMDSize.FullReg, vzr, vzr, vzr); // zero register + // The field polynomial + try (AArch64MacroAssembler.ScratchRegister sc = masm.getScratchRegister()) { + Register scratch = sc.getRegister(); + masm.mov(scratch, 0x00000087L); + masm.neon.dupVG(ASIMDSize.FullReg, ElementSize.DoubleWord, v24, scratch); + } + + masm.fldr(128, v0, AArch64Address.createBaseRegisterOnlyAddress(128, state)); + masm.fldr(128, v1, AArch64Address.createBaseRegisterOnlyAddress(128, subkeyH)); + + // Bit-reverse words in state and subkeyH + masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v0, v0); + masm.neon.rbitVV(ASIMDSize.FullReg, v0, v0); + masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v1, v1); + masm.neon.rbitVV(ASIMDSize.FullReg, v1, v1); + + // long-swap subkeyH into v1 + masm.neon.extVVV(ASIMDSize.FullReg, v4, v1, v1, 0x08); + // xor subkeyH into subkeyL (Karatsuba: (A1+A0)) + masm.neon.eorVVV(ASIMDSize.FullReg, v4, v4, v1); + + Label labelGHASHLoop = new Label(); + masm.bind(labelGHASHLoop); + + // Load the data, bit reversing each byte + masm.fldr(128, v2, AArch64Address.createImmediateAddress(128, IMMEDIATE_POST_INDEXED, data, 0x10)); + masm.neon.rbitVV(ASIMDSize.FullReg, v2, v2); + // bit-swapped data ^ bit-swapped state + masm.neon.eorVVV(ASIMDSize.FullReg, v2, v0, v2); + + // Multiply state in v2 by subkey in v1 + ghashMultiply(masm, + /* resultLo */v5, + /* resultHi */v7, + /* a */v1, + /* b */v2, + /* a1XORa0 */v4, + /* temps */v6, + v3, + /* reuse/clobber b */v2); + // Reduce v7:v5 by the field polynomial + ghashReduce(masm, + /* result */v0, + /* lo */v5, + /* hi */v7, + /* p */v24, + vzr, + /* temp */v3); + + masm.sub(32, blocks, blocks, 1); + masm.cbnz(32, blocks, labelGHASHLoop); + + // The bit-reversed result is at this point in v0 + masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v0, v0); + masm.neon.rbitVV(ASIMDSize.FullReg, v0, v0); + masm.fstr(128, v0, AArch64Address.createBaseRegisterOnlyAddress(128, state)); + } + + /** + * Interleaved GHASH processing. Clobbers all vector registers. + */ + private static void ghashProcessBlocksWide(AArch64MacroAssembler masm, + Register state, + Register subkeyH, + Register data, + Register blocks, + int unrolls) { + Register a1XORa0 = v28; + Register hPrime = v29; + Register vzr = v30; + Register p = v31; + masm.neon.eorVVV(ASIMDSize.FullReg, vzr, vzr, vzr); // zero register + + // The field polynomial + try (AArch64MacroAssembler.ScratchRegister sc = masm.getScratchRegister()) { + Register scratch = sc.getRegister(); + masm.mov(scratch, 0x00000087L); + masm.neon.dupVG(ASIMDSize.FullReg, ElementSize.DoubleWord, p, scratch); + } + + masm.fldr(128, v0, AArch64Address.createBaseRegisterOnlyAddress(128, state)); + masm.fldr(128, hPrime, AArch64Address.createBaseRegisterOnlyAddress(128, subkeyH)); + + // Bit-reverse words in state and subkeyH + masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v0, v0); + masm.neon.rbitVV(ASIMDSize.FullReg, v0, v0); + + masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, hPrime, hPrime); + masm.neon.rbitVV(ASIMDSize.FullReg, hPrime, hPrime); + + // Powers of H -> hPrime + + Label labelAlreadyCalculated = new Label(); + Label labelDone = new Label(); + // The first time around we'll have to calculate H**2, H**3, etc. + // Look at the largest power of H in the subkeyH array to see if + // it's already been calculated. + try (AArch64MacroAssembler.ScratchRegister sc1 = masm.getScratchRegister(); + AArch64MacroAssembler.ScratchRegister sc2 = masm.getScratchRegister()) { + Register rscratch1 = sc1.getRegister(); + Register rscratch2 = sc2.getRegister(); + masm.ldp(64, rscratch1, rscratch2, AArch64Address.createImmediateAddress(64, IMMEDIATE_PAIR_SIGNED_SCALED, subkeyH, 16 * (unrolls - 1))); + masm.orr(64, rscratch1, rscratch1, rscratch2); + masm.cbnz(64, rscratch1, labelAlreadyCalculated); + } + + // Start with H in v6 and hPrime + masm.neon.orrVVV(ASIMDSize.FullReg, v6, hPrime, hPrime); + for (int i = 1; i < unrolls; i++) { + // long-swap subkeyH into a1XORa0 + masm.neon.extVVV(ASIMDSize.FullReg, a1XORa0, hPrime, hPrime, 0x08); + // xor subkeyH into subkeyL (Karatsuba:(A1+A0)) + masm.neon.eorVVV(ASIMDSize.FullReg, a1XORa0, a1XORa0, hPrime); + ghashModmul(masm, + /* result */v6, + /* result_lo */v5, + /* result_hi */v4, + /* b */v6, + hPrime, + vzr, + a1XORa0, + p, + /* temps */v1, + v3, + v2); + masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v1, v6); + masm.neon.rbitVV(ASIMDSize.FullReg, v1, v1); + masm.fstr(128, v1, AArch64Address.createImmediateAddress(128, IMMEDIATE_SIGNED_UNSCALED, subkeyH, 16 * i)); + } + masm.jmp(labelDone); + masm.bind(labelAlreadyCalculated); + + // Load the largest power of H we need into v6. + masm.fldr(128, v6, AArch64Address.createImmediateAddress(128, IMMEDIATE_SIGNED_UNSCALED, subkeyH, 16 * (unrolls - 1))); + masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v6, v6); + masm.neon.rbitVV(ASIMDSize.FullReg, v6, v6); + + masm.bind(labelDone); + // Move H ** unrolls into hPrime + masm.neon.orrVVV(ASIMDSize.FullReg, hPrime, v6, v6); + + // hPrime contains (H ** 1, H ** 2, ... H ** unrolls) + // v0 contains the initial state. Clear the others. + for (int i = 1; i < unrolls; i++) { + int ofs = i * REGISTER_STRIDE; + // zero each state register + masm.neon.eorVVV(ASIMDSize.FullReg, offset(v0, ofs), offset(v0, ofs), offset(v0, ofs)); + } + + // long-swap subkeyH into a1XORa0 + masm.neon.extVVV(ASIMDSize.FullReg, a1XORa0, hPrime, hPrime, 0x08); + // xor subkeyH into subkeyL (Karatsuba: (A1+A0)) + masm.neon.eorVVV(ASIMDSize.FullReg, a1XORa0, a1XORa0, hPrime); + + // Load #unrolls blocks of data + for (int ofs = 0; ofs < unrolls * REGISTER_STRIDE; ofs += REGISTER_STRIDE) { + masm.fldr(128, offset(v2, ofs), AArch64Address.createImmediateAddress(128, IMMEDIATE_POST_INDEXED, data, 0x10)); + } + + // Register assignments, replicated across 4 clones, v0 ... v23 + // + // v0: input / output: current state, result of multiply/reduce + // v1: temp + // v2: input: one block of data (the ciphertext) + // also used as a temp once the data has been consumed + // v3: temp + // v4: output: high part of product + // v5: output: low part ... + // v6: unused + // + // Not replicated: + // + // v28: High part of H xor low part of H' + // v29: H' (hash subkey) + // v30: zero + // v31: Reduction polynomial of the Galois field + + // Inner loop. + // Do the whole load/add/multiply/reduce over all our data except + // the last few rows. + Label labelGHASHLoop = new Label(); + masm.bind(labelGHASHLoop); + + // Prefetching doesn't help here. In fact, on Neoverse N1 it's worse. + // prfm(Address(data, 128), PLDL1KEEP); + + // Xor data into current state + for (int ofs = 0; ofs < unrolls * REGISTER_STRIDE; ofs += REGISTER_STRIDE) { + // bit-swapped data ^ bit-swapped state + masm.neon.rbitVV(ASIMDSize.FullReg, offset(v2, ofs), offset(v2, ofs)); + masm.neon.eorVVV(ASIMDSize.FullReg, offset(v2, ofs), offset(v0, ofs), offset(v2, ofs)); + } + + // Generate fully-unrolled multiply-reduce in two stages. + new GHASHMultiplyGenerator(masm, + unrolls, + /* result_lo */v5, + /* result_hi */v4, + /* data */v2, + hPrime, + a1XORa0, + p, + vzr, + /* temps */v1, + v3, + /* reuse b */v2).unroll(); + + // NB: GHASHReduceGenerator also loads the next #unrolls blocks of + // data into v0, v0+ofs, the current state. + new GHASHReduceGenerator(masm, + unrolls, + /* result */v0, + /* lo */v5, + /* hi */v4, + p, + vzr, + data, + /* data */v2, + /* temp */v3, + true).unroll(); + + masm.sub(64, blocks, blocks, unrolls); + masm.compare(64, blocks, unrolls * 2); + masm.branchConditionally(ConditionFlag.GE, labelGHASHLoop); + + // Merge the #unrolls states. Note that the data for the next + // iteration has already been loaded into v4, v4+ofs, etc... + + // First, we multiply/reduce each clone by the appropriate power of H. + for (int i = 0; i < unrolls; i++) { + int ofs = i * REGISTER_STRIDE; + masm.fldr(128, hPrime, AArch64Address.createImmediateAddress(128, IMMEDIATE_SIGNED_UNSCALED, subkeyH, 16 * (unrolls - i - 1))); + + masm.neon.rbitVV(ASIMDSize.FullReg, offset(v2, ofs), offset(v2, ofs)); + // bit-swapped data ^ bit-swapped state + masm.neon.eorVVV(ASIMDSize.FullReg, offset(v2, ofs), offset(v0, ofs), offset(v2, ofs)); + + masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, hPrime, hPrime); + masm.neon.rbitVV(ASIMDSize.FullReg, hPrime, hPrime); + // long-swap subkeyH into a1XORa0 + masm.neon.extVVV(ASIMDSize.FullReg, a1XORa0, hPrime, hPrime, 0x08); + // xor subkeyH into subkeyL (Karatsuba: (A1+A0)) + masm.neon.eorVVV(ASIMDSize.FullReg, a1XORa0, a1XORa0, hPrime); + ghashModmul(masm, + /* result */offset(v0, ofs), + /* resultLo */offset(v5, ofs), + /* resultHi */offset(v4, ofs), + /* b */offset(v2, ofs), + hPrime, + vzr, + a1XORa0, + p, + /* temps */offset(v1, ofs), + offset(v3, ofs), + /* reuse b */offset(v2, ofs)); + } + + // Then we sum the results. + for (int i = 0; i < unrolls - 1; i++) { + int ofs = i * REGISTER_STRIDE; + masm.neon.eorVVV(ASIMDSize.FullReg, v0, v0, offset(v0, ofs + REGISTER_STRIDE)); + } + + masm.sub(64, blocks, blocks, unrolls); + + // And finally bit-reverse the state back to big endian. + masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v0, v0); + masm.neon.rbitVV(ASIMDSize.FullReg, v0, v0); + masm.fstr(128, v0, AArch64Address.createBaseRegisterOnlyAddress(128, state)); + } + + static final class GHASHMultiplyGenerator extends AArch64AESEncryptOp.KernelGenerator { + + private final AArch64MacroAssembler masm; + private final Register resultLo; + private final Register resultHi; + private final Register b; + private final Register a; + private final Register vzr; + private final Register a1XORa0; + private final Register p; + private final Register tmp1; + private final Register tmp2; + private final Register tmp3; + + GHASHMultiplyGenerator(AArch64MacroAssembler masm, + int unrolls, + Register resultLo, + Register resultHi, + Register b, + Register a, + Register a1XORa0, + Register p, + Register vzr, + Register tmp1, + Register tmp2, + Register tmp3) { + super(unrolls); + this.masm = masm; + this.resultLo = resultLo; + this.resultHi = resultHi; + this.b = b; + this.a = a; + this.a1XORa0 = a1XORa0; + this.p = p; + this.vzr = vzr; + this.tmp1 = tmp1; + this.tmp2 = tmp2; + this.tmp3 = tmp3; + } + + @Override + public void generate(int index) { + // Karatsuba multiplication performs a 128*128 -> 256-bit + // multiplication in three 128-bit multiplications and a few + // additions. + // + // (C1:C0) = A1*B1, (D1:D0) = A0*B0, (E1:E0) = (A0+A1)(B0+B1) + // (A1:A0)(B1:B0) = C1:(C0+C1+D1+E1):(D1+C0+D0+E0):D0 + // + // Inputs: + // + // A0 in a.d[0] (subkey) + // A1 in a.d[1] + // (A1+A0) in a1_xor_a0.d[0] + // + // B0 in b.d[0] (state) + // B1 in b.d[1] + + switch (index) { + case 0: + masm.neon.extVVV(ASIMDSize.FullReg, tmp1, b, b, 0x08); + break; + case 1: + masm.neon.pmullVVV(ASIMDSize.FullReg, ElementSize.DoubleWord, resultHi, b, a); // A1*B1 + break; + case 2: + masm.neon.eorVVV(ASIMDSize.FullReg, tmp1, tmp1, b); // (B1+B0) + break; + case 3: + masm.neon.pmullVVV(ASIMDSize.HalfReg, ElementSize.DoubleWord, resultLo, b, a); // A0*B0 + break; + case 4: + masm.neon.pmullVVV(ASIMDSize.HalfReg, ElementSize.DoubleWord, tmp2, tmp1, a1XORa0); // (A1+A0)(B1+B0) + break; + case 5: + masm.neon.extVVV(ASIMDSize.FullReg, tmp1, resultLo, resultHi, 0x08); + break; + case 6: + masm.neon.eorVVV(ASIMDSize.FullReg, tmp3, resultHi, resultLo); // A1*B1+A0*B0 + break; + case 7: + masm.neon.eorVVV(ASIMDSize.FullReg, tmp2, tmp2, tmp1); + break; + case 8: + masm.neon.eorVVV(ASIMDSize.FullReg, tmp2, tmp2, tmp3); + break; + // Register pair holds the result of carry-less multiplication + case 9: + masm.neon.insVV(ElementSize.DoubleWord, resultHi, 0, tmp2, 1); + break; + case 10: + masm.neon.insVV(ElementSize.DoubleWord, resultLo, 1, tmp2, 0); + break; + default: + throw GraalError.shouldNotReachHere(); + } + } + + @Override + public AArch64AESEncryptOp.KernelGenerator next() { + return new GHASHMultiplyGenerator(masm, + unrolls, + offset(resultLo, REGISTER_STRIDE), + offset(resultHi, REGISTER_STRIDE), + offset(b, REGISTER_STRIDE), + a, + a1XORa0, + p, + vzr, + offset(tmp1, REGISTER_STRIDE), + offset(tmp2, REGISTER_STRIDE), + offset(tmp3, REGISTER_STRIDE)); + } + + @Override + public int length() { + return 11; + } + } + + /** + * Reduce the 128-bit product in hi:lo by the GCM field polynomial. The Register argument called + * data is optional: if it is a valid register, we interleave LD1 instructions with the + * reduction. This is to reduce latency next time around the loop. + */ + static final class GHASHReduceGenerator extends AArch64AESEncryptOp.KernelGenerator { + + private final AArch64MacroAssembler masm; + private final Register result; + private final Register lo; + private final Register hi; + private final Register p; + private final Register vzr; + private final Register dataPtr; + private final Register data; + private final Register t1; + private final boolean once; + + GHASHReduceGenerator(AArch64MacroAssembler masm, + int unrolls, + Register result, + Register lo, + Register hi, + Register p, + Register vzr, + Register dataPtr, + Register data, + Register t1, + boolean once) { + super(unrolls); + + this.masm = masm; + this.result = result; + this.lo = lo; + this.hi = hi; + this.p = p; + this.vzr = vzr; + this.dataPtr = dataPtr; + this.data = data; + this.t1 = t1; + this.once = once; + } + + @Override + public void generate(int index) { + Register t0 = result; + + switch (index) { + // The GCM field polynomial f is z^128 + p(z), where p = + // z^7+z^2+z+1. + // + // z^128 === -p(z) (mod (z^128 + p(z))) + // + // so, given that the product we're reducing is + // a == lo + hi * z^128 + // substituting, + // === lo - hi * p(z) (mod (z^128 + p(z))) + // + // we reduce by multiplying hi by p(z) and subtracting the _result + // from (i.e. XORing it with) lo. Because p has no nonzero high + // bits we can do this with two 64-bit multiplications, lo*p and + // hi*p. + case 0: + masm.neon.pmullVVV(ASIMDSize.FullReg, ElementSize.DoubleWord, t0, hi, p); + break; + case 1: + masm.neon.extVVV(ASIMDSize.FullReg, t1, t0, vzr, 8); + break; + case 2: + masm.neon.eorVVV(ASIMDSize.FullReg, hi, hi, t1); + break; + case 3: + masm.neon.extVVV(ASIMDSize.FullReg, t1, vzr, t0, 8); + break; + case 4: + masm.neon.eorVVV(ASIMDSize.FullReg, lo, lo, t1); + break; + case 5: + masm.neon.pmullVVV(ASIMDSize.HalfReg, ElementSize.DoubleWord, t0, hi, p); + break; + case 6: + masm.neon.eorVVV(ASIMDSize.FullReg, result, lo, t0); + break; + default: + throw GraalError.shouldNotReachHere(); + } + + // Sprinkle load instructions into the generated instructions + if (!Register.None.equals(data) && once) { + assert length() >= unrolls : "not enough room for interleaved loads"; + if (index < unrolls) { + masm.fldr(128, offset(data, index * REGISTER_STRIDE), + AArch64Address.createImmediateAddress(128, IMMEDIATE_POST_INDEXED, dataPtr, 0x10)); + } + } + } + + @Override + public AArch64AESEncryptOp.KernelGenerator next() { + return new GHASHReduceGenerator(masm, + unrolls, + offset(result, REGISTER_STRIDE), + offset(lo, REGISTER_STRIDE), + offset(hi, REGISTER_STRIDE), + p, + vzr, + dataPtr, + data, + offset(t1, REGISTER_STRIDE), + false); + } + + @Override + public int length() { + return 7; + } + } + + /** + * Perform a GHASH multiply/reduce on a single FloatRegister. + */ + private static void ghashModmul(AArch64MacroAssembler masm, + Register result, + Register resultLo, + Register resultHi, + Register b, + Register a, + Register vzr, + Register a1XORa0, + Register p, + Register t1, + Register t2, + Register t3) { + ghashMultiply(masm, resultLo, resultHi, a, b, a1XORa0, t1, t2, t3); + ghashReduce(masm, result, resultLo, resultHi, p, vzr, t1); + } + + private static void ghashReduce(AArch64MacroAssembler masm, + Register result, + Register lo, + Register hi, + Register p, + Register vzr, + Register t1) { + Register t0 = result; + + // The GCM field polynomial f is z^128 + p(z), where p = + // z^7+z^2+z+1. + // + // z^128 === -p(z) (mod (z^128 + p(z))) + // + // so, given that the product we're reducing is + // a == lo + hi * z^128 + // substituting, + // === lo - hi * p(z) (mod (z^128 + p(z))) + // + // we reduce by multiplying hi by p(z) and subtracting the result + // from (i.e. XORing it with) lo. Because p has no nonzero high + // bits we can do this with two 64-bit multiplications, lo*p and + // hi*p. + + masm.neon.pmullVVV(ASIMDSize.FullReg, ElementSize.DoubleWord, t0, hi, p); + masm.neon.extVVV(ASIMDSize.FullReg, t1, t0, vzr, 8); + masm.neon.eorVVV(ASIMDSize.FullReg, hi, hi, t1); + masm.neon.extVVV(ASIMDSize.FullReg, t1, vzr, t0, 8); + masm.neon.eorVVV(ASIMDSize.FullReg, lo, lo, t1); + masm.neon.pmullVVV(ASIMDSize.HalfReg, ElementSize.DoubleWord, t0, hi, p); + masm.neon.eorVVV(ASIMDSize.FullReg, result, lo, t0); + } + + /** + * + * ghashMultiply and ghashReduce are the non-unrolled versions of the GHASH function generators. + */ + private static void ghashMultiply(AArch64MacroAssembler masm, + Register resultLo, + Register resultHi, + Register a, + Register b, + Register a1XORa0, + Register tmp1, + Register tmp2, + Register tmp3) { + // Karatsuba multiplication performs a 128*128 -> 256-bit + // multiplication in three 128-bit multiplications and a few + // additions. + // + // (C1:C0) = A1*B1, (D1:D0) = A0*B0, (E1:E0) = (A0+A1)(B0+B1) + // (A1:A0)(B1:B0) = C1:(C0+C1+D1+E1):(D1+C0+D0+E0):D0 + // + // Inputs: + // + // A0 in a.d[0] (subkey) + // A1 in a.d[1] + // (A1+A0) in a1_xor_a0.d[0] + // + // B0 in b.d[0] (state) + // B1 in b.d[1] + masm.neon.extVVV(ASIMDSize.FullReg, tmp1, b, b, 0x08); + masm.neon.pmullVVV(ASIMDSize.FullReg, ElementSize.DoubleWord, resultHi, b, a); // A1*B1 + masm.neon.eorVVV(ASIMDSize.FullReg, tmp1, tmp1, b); // (B1+B0) + masm.neon.pmullVVV(ASIMDSize.HalfReg, ElementSize.DoubleWord, resultLo, b, a); // A0*B0 + masm.neon.pmullVVV(ASIMDSize.HalfReg, ElementSize.DoubleWord, tmp2, tmp1, a1XORa0); // (A1+A0)(B1+B0) + + masm.neon.extVVV(ASIMDSize.FullReg, tmp1, resultLo, resultHi, 0x08); + masm.neon.eorVVV(ASIMDSize.FullReg, tmp3, resultHi, resultLo); // A1*B1+A0*B0 + masm.neon.eorVVV(ASIMDSize.FullReg, tmp2, tmp2, tmp1); + masm.neon.eorVVV(ASIMDSize.FullReg, tmp2, tmp2, tmp3); + + // Register pair holds the result of carry-less multiplication + masm.neon.insVV(ElementSize.DoubleWord, resultHi, 0, tmp2, 1); + masm.neon.insVV(ElementSize.DoubleWord, resultLo, 1, tmp2, 0); + } +} diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64ArrayCompareToOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64ArrayCompareToOp.java index 90a46dff8f25..7c96611a5028 100644 --- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64ArrayCompareToOp.java +++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64ArrayCompareToOp.java @@ -268,24 +268,24 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) { // and sub the size too masm.sublAndJcc(cnt2, stride2x2, ConditionFlag.NotZero, labelCompareWideVectorsLoopAVX3, true); - masm.vpxor(vec1, vec1, vec1); + masm.vpxor(vec1, vec1, vec1, AVXSize.YMM); masm.jmpb(labelCompareWideTail); } masm.bind(labelCompareWideVectorsLoopAVX2); if (strideA == strideB) { masm.vmovdqu(vec1, new AMD64Address(str1, result, maxStride)); - masm.vpxor(vec1, vec1, new AMD64Address(str2, result, maxStride)); + masm.vpxor(vec1, vec1, new AMD64Address(str2, result, maxStride), AVXSize.YMM); } else { masm.vpmovzxbw(vec1, new AMD64Address(str1, result, scale1)); - masm.vpxor(vec1, vec1, new AMD64Address(str2, result, scale2)); + masm.vpxor(vec1, vec1, new AMD64Address(str2, result, scale2), AVXSize.YMM); } - masm.vptest(vec1, vec1); + masm.vptest(vec1, vec1, AVXSize.YMM); masm.jcc(ConditionFlag.NotZero, labelVectorNotEqual); masm.addq(result, elementsPerYMMVector); masm.sublAndJcc(cnt2, elementsPerYMMVector, ConditionFlag.NotZero, labelCompareWideVectorsLoop, false); // clean upper bits of YMM registers - masm.vpxor(vec1, vec1, vec1); + masm.vpxor(vec1, vec1, vec1, AVXSize.YMM); // compare wide vectors tail masm.bind(labelCompareWideTail); @@ -299,7 +299,7 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) { // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. masm.bind(labelVectorNotEqual); // clean upper bits of YMM registers - masm.vpxor(vec1, vec1, vec1); + masm.vpxor(vec1, vec1, vec1, AVXSize.YMM); if (strideA == strideB) { masm.leaq(str1, new AMD64Address(str1, result, maxStride)); masm.leaq(str2, new AMD64Address(str2, result, maxStride)); diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64EncodeArrayOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64EncodeArrayOp.java index 0a5416a0b817..18da0d2bcbc8 100644 --- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64EncodeArrayOp.java +++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64EncodeArrayOp.java @@ -36,6 +36,7 @@ import org.graalvm.compiler.asm.Label; import org.graalvm.compiler.asm.amd64.AMD64Address; +import org.graalvm.compiler.asm.amd64.AVXKind; import org.graalvm.compiler.core.common.Stride; import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag; import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler; @@ -161,7 +162,7 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) { masm.vmovdqu(vectorTemp3, new AMD64Address(src, len, Stride.S2, -64)); masm.vmovdqu(vectorTemp4, new AMD64Address(src, len, Stride.S2, -32)); masm.emit(VPOR, vectorTemp2, vectorTemp3, vectorTemp4, YMM); - masm.vptest(vectorTemp2, vectorTemp1); + masm.vptest(vectorTemp2, vectorTemp1, AVXKind.AVXSize.YMM); masm.jcc(ConditionFlag.NotZero, labelCopy32CharsExit, true); masm.emit(VPACKUSWB, vectorTemp3, vectorTemp3, vectorTemp4, YMM); masm.emit(VPERMQ, vectorTemp4, vectorTemp3, 0xD8, YMM); @@ -184,7 +185,7 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) { if (supportsAVX2AndYMM()) { masm.vmovdqu(vectorTemp2, new AMD64Address(src, len, Stride.S2, -32)); - masm.vptest(vectorTemp2, vectorTemp1); + masm.vptest(vectorTemp2, vectorTemp1, AVXKind.AVXSize.YMM); masm.jcc(ConditionFlag.NotZero, labelCopy16CharsExit); masm.emit(VPACKUSWB, vectorTemp2, vectorTemp2, vectorTemp1, YMM); masm.emit(VPERMQ, vectorTemp3, vectorTemp2, 0xD8, YMM); diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64GHASHProcessBlocksOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64GHASHProcessBlocksOp.java new file mode 100644 index 000000000000..9bdf8ae16880 --- /dev/null +++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64GHASHProcessBlocksOp.java @@ -0,0 +1,575 @@ +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.graalvm.compiler.lir.amd64; + +import static jdk.vm.ci.amd64.AMD64.rax; +import static jdk.vm.ci.amd64.AMD64.xmm0; +import static jdk.vm.ci.amd64.AMD64.xmm1; +import static jdk.vm.ci.amd64.AMD64.xmm10; +import static jdk.vm.ci.amd64.AMD64.xmm11; +import static jdk.vm.ci.amd64.AMD64.xmm13; +import static jdk.vm.ci.amd64.AMD64.xmm14; +import static jdk.vm.ci.amd64.AMD64.xmm15; +import static jdk.vm.ci.amd64.AMD64.xmm2; +import static jdk.vm.ci.amd64.AMD64.xmm3; +import static jdk.vm.ci.amd64.AMD64.xmm4; +import static jdk.vm.ci.amd64.AMD64.xmm5; +import static jdk.vm.ci.amd64.AMD64.xmm6; +import static jdk.vm.ci.amd64.AMD64.xmm7; +import static jdk.vm.ci.amd64.AMD64.xmm8; +import static jdk.vm.ci.amd64.AMD64.xmm9; +import static jdk.vm.ci.code.ValueUtil.asRegister; +import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG; +import static org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper.pointerConstant; +import static org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper.recordExternalAddress; + +import org.graalvm.compiler.asm.Label; +import org.graalvm.compiler.asm.amd64.AMD64Address; +import org.graalvm.compiler.asm.amd64.AMD64Assembler; +import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag; +import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler; +import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize; +import org.graalvm.compiler.lir.LIRInstructionClass; +import org.graalvm.compiler.lir.StubPort; +import org.graalvm.compiler.lir.asm.ArrayDataPointerConstant; +import org.graalvm.compiler.lir.asm.CompilationResultBuilder; +import org.graalvm.compiler.lir.gen.LIRGeneratorTool; + +import jdk.vm.ci.amd64.AMD64; +import jdk.vm.ci.code.Register; +import jdk.vm.ci.meta.AllocatableValue; +import jdk.vm.ci.meta.Value; + +// @formatter:off +@StubPort(path = "src/hotspot/cpu/x86/stubGenerator_x86_64.cpp", + lineStart = 5281, + lineEnd = 5448, + commit = "27af0144ea57e86d9b81c2b328fad66e4a046f61", + sha1 = "dde6c3a58860fe4182bb03861710e6ed5b55cb51") +@StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86_aes.cpp", + lineStart = 490, + lineEnd = 780, + commit = "27af0144ea57e86d9b81c2b328fad66e4a046f61", + sha1 = "2fae9aba4278b89fff3021a5e14450541d21b52f") +// @formatter:on +public final class AMD64GHASHProcessBlocksOp extends AMD64LIRInstruction { + + public static final LIRInstructionClass TYPE = LIRInstructionClass.create(AMD64GHASHProcessBlocksOp.class); + + @Alive({REG}) private Value stateValue; + @Alive({REG}) private Value htblValue; + @Alive({REG}) private Value originalDataValue; + @Alive({REG}) private Value originalBlocksValue; + + @Temp protected Value dataValue; + @Temp protected Value blocksValue; + + @Temp protected Value[] temps; + + public AMD64GHASHProcessBlocksOp(LIRGeneratorTool tool, + AllocatableValue stateValue, + AllocatableValue htblValue, + AllocatableValue originalDataValue, + AllocatableValue originalBlocksValue) { + super(TYPE); + + this.stateValue = stateValue; + this.htblValue = htblValue; + this.originalDataValue = originalDataValue; + this.originalBlocksValue = originalBlocksValue; + + this.dataValue = tool.newVariable(originalDataValue.getValueKind()); + this.blocksValue = tool.newVariable(originalBlocksValue.getValueKind()); + + if (((AMD64) tool.target().arch).getFeatures().contains(AMD64.CPUFeature.AVX)) { + this.temps = new Value[]{ + rax.asValue(), + xmm0.asValue(), + xmm1.asValue(), + xmm2.asValue(), + xmm3.asValue(), + xmm4.asValue(), + xmm5.asValue(), + xmm6.asValue(), + xmm7.asValue(), + xmm8.asValue(), + xmm9.asValue(), + xmm10.asValue(), + xmm11.asValue(), + xmm13.asValue(), + xmm14.asValue(), + xmm15.asValue(), + }; + } else { + this.temps = new Value[]{ + xmm0.asValue(), + xmm1.asValue(), + xmm2.asValue(), + xmm3.asValue(), + xmm4.asValue(), + xmm5.asValue(), + xmm6.asValue(), + xmm7.asValue(), + xmm8.asValue(), + xmm9.asValue(), + xmm10.asValue(), + }; + } + } + + private ArrayDataPointerConstant ghashLongSwapMask = pointerConstant(16, new int[]{ + // @formatter:off + 0x0b0a0908, 0x0f0e0d0c, 0x03020100, 0x07060504 + // @formatter:on + }); + + private ArrayDataPointerConstant ghashByteSwapMask = pointerConstant(16, new int[]{ + // @formatter:off + 0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203 + // @formatter:on + }); + + private ArrayDataPointerConstant ghashShuffleMask = pointerConstant(16, new int[]{ + // @formatter:off + 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f + // @formatter:on + }); + + private ArrayDataPointerConstant ghashPolynomial = pointerConstant(16, new int[]{ + // @formatter:off + 0x00000001, 0x00000000, 0x00000000, 0xc2000000 + // @formatter:on + }); + + @Override + public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) { + if (masm.supports(AMD64.CPUFeature.AVX)) { + Label labelBeginProcess = new Label(); + Label labelBlock8Reduction = new Label(); + Label labelOneBlkInit = new Label(); + Label labelProcess1Block = new Label(); + Label labelProcess8Blocks = new Label(); + Label labelSaveState = new Label(); + Label labelExitGHASH = new Label(); + + Register inputState = asRegister(stateValue); + Register htbl = asRegister(htblValue); + Register originalData = asRegister(originalDataValue); + Register originalBlocks = asRegister(originalBlocksValue); + + Register inputData = asRegister(dataValue); + Register blocks = asRegister(blocksValue); + + masm.movq(inputData, originalData); + masm.movq(blocks, originalBlocks); + + // temporary variables to hold input data and input state + Register data = xmm1; + Register state = xmm0; + // temporary variables to hold intermediate results + Register tmp0 = xmm3; + Register tmp1 = xmm4; + Register tmp2 = xmm5; + Register tmp3 = xmm6; + // temporary variables to hold byte and long swap masks + Register bswapMask = xmm2; + Register lswapMask = xmm14; + + masm.testqAndJcc(blocks, blocks, ConditionFlag.Zero, labelExitGHASH, false); + + // Check if Hashtable (1*16) has been already generated + // For anything less than 8 blocks, we generate only the first power of H. + masm.movdqu(tmp2, new AMD64Address(htbl, 1 * 16)); + masm.vptest(tmp2, tmp2, AVXSize.XMM); + masm.jcc(ConditionFlag.NotZero, labelBeginProcess); + generateHtblOneBlock(crb, masm, htbl); + + masm.bind(labelBeginProcess); + masm.movdqu(lswapMask, recordExternalAddress(crb, ghashLongSwapMask)); + masm.movdqu(state, new AMD64Address(inputState)); + masm.vpshufb(state, state, lswapMask, AVXSize.XMM); + + masm.cmplAndJcc(blocks, 8, ConditionFlag.Below, labelOneBlkInit, false); + // If we have 8 blocks or more data, then generate remaining powers of H + masm.movdqu(tmp2, new AMD64Address(htbl, 8 * 16)); + masm.vptest(tmp2, tmp2, AVXSize.XMM); + masm.jcc(ConditionFlag.NotZero, labelProcess8Blocks); + generateHtblEightBlocks(masm, htbl); + + // Do 8 multiplies followed by a reduction processing 8 blocks of data at a time + // Each block = 16 bytes. + masm.bind(labelProcess8Blocks); + masm.subl(blocks, 8); + masm.movdqu(bswapMask, recordExternalAddress(crb, ghashByteSwapMask)); + masm.movdqu(data, new AMD64Address(inputData, 16 * 7)); + masm.vpshufb(data, data, bswapMask, AVXSize.XMM); + // Loading 1*16 as calculated powers of H required starts at that location. + masm.movdqu(xmm15, new AMD64Address(htbl, 1 * 16)); + // Perform carryless multiplication of (H*2, data block #7) + masm.vpclmulhqlqdq(tmp2, data, xmm15); // a0 * b1 + masm.vpclmullqlqdq(tmp0, data, xmm15); // a0 * b0 + masm.vpclmulhqhqdq(tmp1, data, xmm15); // a1 * b1 + masm.vpclmullqhqdq(tmp3, data, xmm15); // a1 * b0 + masm.vpxor(tmp2, tmp2, tmp3, AVXSize.XMM); // (a0 * b1) + (a1 * b0) + + masm.movdqu(data, new AMD64Address(inputData, 16 * 6)); + masm.vpshufb(data, data, bswapMask, AVXSize.XMM); + // Perform carryless multiplication of (H^2 * 2, data block #6) + schoolbookAAD(masm, 2, htbl, data, tmp0, tmp1, tmp2, tmp3); + + masm.movdqu(data, new AMD64Address(inputData, 16 * 5)); + masm.vpshufb(data, data, bswapMask, AVXSize.XMM); + // Perform carryless multiplication of (H^3 * 2, data block #5) + schoolbookAAD(masm, 3, htbl, data, tmp0, tmp1, tmp2, tmp3); + masm.movdqu(data, new AMD64Address(inputData, 16 * 4)); + masm.vpshufb(data, data, bswapMask, AVXSize.XMM); + // Perform carryless multiplication of (H^4 * 2, data block #4) + schoolbookAAD(masm, 4, htbl, data, tmp0, tmp1, tmp2, tmp3); + masm.movdqu(data, new AMD64Address(inputData, 16 * 3)); + masm.vpshufb(data, data, bswapMask, AVXSize.XMM); + // Perform carryless multiplication of (H^5 * 2, data block #3) + schoolbookAAD(masm, 5, htbl, data, tmp0, tmp1, tmp2, tmp3); + masm.movdqu(data, new AMD64Address(inputData, 16 * 2)); + masm.vpshufb(data, data, bswapMask, AVXSize.XMM); + // Perform carryless multiplication of (H^6 * 2, data block #2) + schoolbookAAD(masm, 6, htbl, data, tmp0, tmp1, tmp2, tmp3); + masm.movdqu(data, new AMD64Address(inputData, 16 * 1)); + masm.vpshufb(data, data, bswapMask, AVXSize.XMM); + // Perform carryless multiplication of (H^7 * 2, data block #1) + schoolbookAAD(masm, 7, htbl, data, tmp0, tmp1, tmp2, tmp3); + masm.movdqu(data, new AMD64Address(inputData, 16 * 0)); + // xor data block#0 with input state before performing carry-less multiplication + masm.vpshufb(data, data, bswapMask, AVXSize.XMM); + masm.vpxor(data, data, state, AVXSize.XMM); + // Perform carryless multiplication of (H^8 * 2, data block #0) + schoolbookAAD(masm, 8, htbl, data, tmp0, tmp1, tmp2, tmp3); + masm.vpslldq(tmp3, tmp2, 8, AVXSize.XMM); + masm.vpsrldq(tmp2, tmp2, 8, AVXSize.XMM); + // tmp0, tmp1 contains aggregated results of the multiplication operation + masm.vpxor(tmp0, tmp0, tmp3, AVXSize.XMM); + masm.vpxor(tmp1, tmp1, tmp2, AVXSize.XMM); + + // we have the 2 128-bit partially accumulated multiplication results in tmp0:tmp1 + // with higher 128-bit in tmp1 and lower 128-bit in corresponding tmp0 + // Follows the reduction technique mentioned in + // Shift-XOR reduction described in Gueron-Kounavis May 2010 + masm.bind(labelBlock8Reduction); + // First Phase of the reduction + masm.vpslld(xmm8, tmp0, 31, AVXSize.XMM); // packed right shifting << 31 + masm.vpslld(xmm9, tmp0, 30, AVXSize.XMM); // packed right shifting << 30 + masm.vpslld(xmm10, tmp0, 25, AVXSize.XMM); // packed right shifting << 25 + // xor the shifted versions + masm.vpxor(xmm8, xmm8, xmm10, AVXSize.XMM); + masm.vpxor(xmm8, xmm8, xmm9, AVXSize.XMM); + + masm.vpslldq(xmm9, xmm8, 12, AVXSize.XMM); + masm.vpsrldq(xmm8, xmm8, 4, AVXSize.XMM); + + masm.vpxor(tmp0, tmp0, xmm9, AVXSize.XMM); // first phase of reduction is complete + // second phase of the reduction + masm.vpsrld(xmm9, tmp0, 1, AVXSize.XMM); // packed left shifting >> 1 + masm.vpsrld(xmm10, tmp0, 2, AVXSize.XMM); // packed left shifting >> 2 + masm.vpsrld(tmp2, tmp0, 7, AVXSize.XMM); // packed left shifting >> 7 + // xor the shifted versions + masm.vpxor(xmm9, xmm9, xmm10, AVXSize.XMM); + masm.vpxor(xmm9, xmm9, tmp2, AVXSize.XMM); + masm.vpxor(xmm9, xmm9, xmm8, AVXSize.XMM); + masm.vpxor(tmp0, xmm9, tmp0, AVXSize.XMM); + // Final result is in state + masm.vpxor(state, tmp0, tmp1, AVXSize.XMM); + + masm.leaq(inputData, new AMD64Address(inputData, 16 * 8)); + masm.cmplAndJcc(blocks, 8, AMD64Assembler.ConditionFlag.Below, labelOneBlkInit, false); + masm.jmp(labelProcess8Blocks); + + // Since this is one block operation we will only use H * 2 i.e. the first power of H + masm.bind(labelOneBlkInit); + masm.movdqu(tmp0, new AMD64Address(htbl, 1 * 16)); + masm.movdqu(bswapMask, recordExternalAddress(crb, ghashByteSwapMask)); + + // Do one (128 bit x 128 bit) carry-less multiplication at a time followed by a + // reduction. + masm.bind(labelProcess1Block); + masm.cmplAndJcc(blocks, 0, AMD64Assembler.ConditionFlag.Equal, labelSaveState, false); + masm.subl(blocks, 1); + masm.movdqu(data, new AMD64Address(inputData)); + masm.vpshufb(data, data, bswapMask, AVXSize.XMM); + masm.vpxor(state, state, data, AVXSize.XMM); + // gfmul(H*2, state) + gfmul(masm, tmp0, state); + masm.addq(inputData, 16); + masm.jmp(labelProcess1Block); + + masm.bind(labelSaveState); + masm.vpshufb(state, state, lswapMask, AVXSize.XMM); + masm.movdqu(new AMD64Address(inputState), state); + + masm.bind(labelExitGHASH); + // zero out xmm registers used for Htbl storage + masm.vpxor(xmm0, xmm0, xmm0, AVXSize.XMM); + masm.vpxor(xmm1, xmm1, xmm1, AVXSize.XMM); + masm.vpxor(xmm3, xmm3, xmm3, AVXSize.XMM); + masm.vpxor(xmm15, xmm15, xmm15, AVXSize.XMM); + } else { + Label labelGHASHLoop = new Label(); + Label labelExit = new Label(); + + Register state = asRegister(stateValue); + Register subkeyH = asRegister(htblValue); + Register originalData = asRegister(originalDataValue); + Register originalBlocks = asRegister(originalBlocksValue); + + Register data = asRegister(dataValue); + Register blocks = asRegister(blocksValue); + + masm.movq(data, originalData); + masm.movq(blocks, originalBlocks); + + Register xmmTemp0 = xmm0; + Register xmmTemp1 = xmm1; + Register xmmTemp2 = xmm2; + Register xmmTemp3 = xmm3; + Register xmmTemp4 = xmm4; + Register xmmTemp5 = xmm5; + Register xmmTemp6 = xmm6; + Register xmmTemp7 = xmm7; + Register xmmTemp8 = xmm8; + Register xmmTemp9 = xmm9; + Register xmmTemp10 = xmm10; + + masm.movdqu(xmmTemp10, recordExternalAddress(crb, ghashLongSwapMask)); + + masm.movdqu(xmmTemp0, new AMD64Address(state)); + masm.pshufb(xmmTemp0, xmmTemp10); + + masm.bind(labelGHASHLoop); + masm.movdqu(xmmTemp2, new AMD64Address(data)); + masm.pshufb(xmmTemp2, recordExternalAddress(crb, ghashByteSwapMask)); + + masm.movdqu(xmmTemp1, new AMD64Address(subkeyH)); + masm.pshufb(xmmTemp1, xmmTemp10); + + masm.pxor(xmmTemp0, xmmTemp2); + + // Multiply with the hash key + masm.movdqu(xmmTemp3, xmmTemp0); + masm.pclmulqdq(xmmTemp3, xmmTemp1, 0); // xmm3 holds a0*b0 + masm.movdqu(xmmTemp4, xmmTemp0); + masm.pclmulqdq(xmmTemp4, xmmTemp1, 16); // xmm4 holds a0*b1 + + masm.movdqu(xmmTemp5, xmmTemp0); + masm.pclmulqdq(xmmTemp5, xmmTemp1, 1); // xmm5 holds a1*b0 + masm.movdqu(xmmTemp6, xmmTemp0); + masm.pclmulqdq(xmmTemp6, xmmTemp1, 17); // xmm6 holds a1*b1 + + masm.pxor(xmmTemp4, xmmTemp5); // xmm4 holds a0*b1 + a1*b0 + + masm.movdqu(xmmTemp5, xmmTemp4); // move the contents of xmm4 to xmm5 + masm.psrldq(xmmTemp4, 8); // shift by xmm4 64 bits to the right + masm.pslldq(xmmTemp5, 8); // shift by xmm5 64 bits to the left + // Register pair holds the result of the carry-less multiplication of xmm0 + // by xmm1. + masm.pxor(xmmTemp3, xmmTemp5); + masm.pxor(xmmTemp6, xmmTemp4); + + // We shift the result of the multiplication by one bit position + // to the left to cope for the fact that the bits are reversed. + masm.movdqu(xmmTemp7, xmmTemp3); + masm.movdqu(xmmTemp8, xmmTemp6); + masm.pslld(xmmTemp3, 1); + masm.pslld(xmmTemp6, 1); + masm.psrld(xmmTemp7, 31); + masm.psrld(xmmTemp8, 31); + masm.movdqu(xmmTemp9, xmmTemp7); + masm.pslldq(xmmTemp8, 4); + masm.pslldq(xmmTemp7, 4); + masm.psrldq(xmmTemp9, 12); + masm.por(xmmTemp3, xmmTemp7); + masm.por(xmmTemp6, xmmTemp8); + masm.por(xmmTemp6, xmmTemp9); + + // + // First phase of the reduction + // + // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts + // independently. + masm.movdqu(xmmTemp7, xmmTemp3); + masm.movdqu(xmmTemp8, xmmTemp3); + masm.movdqu(xmmTemp9, xmmTemp3); + masm.pslld(xmmTemp7, 31); // packed right shift shifting << 31 + masm.pslld(xmmTemp8, 30); // packed right shift shifting << 30 + masm.pslld(xmmTemp9, 25); // packed right shift shifting << 25 + masm.pxor(xmmTemp7, xmmTemp8); // xor the shifted versions + masm.pxor(xmmTemp7, xmmTemp9); + masm.movdqu(xmmTemp8, xmmTemp7); + masm.pslldq(xmmTemp7, 12); + masm.psrldq(xmmTemp8, 4); + masm.pxor(xmmTemp3, xmmTemp7); // first phase of the reduction complete + + // + // Second phase of the reduction + // + // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these + // shift operations. + masm.movdqu(xmmTemp2, xmmTemp3); + masm.movdqu(xmmTemp4, xmmTemp3); + masm.movdqu(xmmTemp5, xmmTemp3); + masm.psrld(xmmTemp2, 1); // packed left shifting >> 1 + masm.psrld(xmmTemp4, 2); // packed left shifting >> 2 + masm.psrld(xmmTemp5, 7); // packed left shifting >> 7 + masm.pxor(xmmTemp2, xmmTemp4); // xor the shifted versions + masm.pxor(xmmTemp2, xmmTemp5); + masm.pxor(xmmTemp2, xmmTemp8); + masm.pxor(xmmTemp3, xmmTemp2); + masm.pxor(xmmTemp6, xmmTemp3); // the result is in xmm6 + + masm.decqAndJcc(blocks, AMD64Assembler.ConditionFlag.Zero, labelExit, false); + masm.movdqu(xmmTemp0, xmmTemp6); + masm.addq(data, 16); + masm.jmp(labelGHASHLoop); + + masm.bind(labelExit); + masm.pshufb(xmmTemp6, xmmTemp10); // Byte swap 16-byte result + masm.movdqu(new AMD64Address(state), xmmTemp6); // store the result + } + } + + /** + * Multiply 128 x 128 bits, using 4 pclmulqdq operations. + */ + private static void schoolbookAAD(AMD64MacroAssembler masm, int i, Register htbl, Register data, Register tmp0, Register tmp1, Register tmp2, Register tmp3) { + masm.movdqu(xmm15, new AMD64Address(htbl, i * 16)); + masm.vpclmulhqlqdq(tmp3, data, xmm15); // 0x01 + masm.vpxor(tmp2, tmp2, tmp3, AVXSize.XMM); + masm.vpclmullqlqdq(tmp3, data, xmm15); // 0x00 + masm.vpxor(tmp0, tmp0, tmp3, AVXSize.XMM); + masm.vpclmulhqhqdq(tmp3, data, xmm15); // 0x11 + masm.vpxor(tmp1, tmp1, tmp3, AVXSize.XMM); + masm.vpclmullqhqdq(tmp3, data, xmm15); // 0x10 + masm.vpxor(tmp2, tmp2, tmp3, AVXSize.XMM); + } + + /** + * Multiply two 128 bit numbers resulting in a 256 bit value Result of the multiplication + * followed by reduction stored in state. + */ + private static void gfmul(AMD64MacroAssembler masm, Register tmp0, Register state) { + Register tmp1 = xmm4; + Register tmp2 = xmm5; + Register tmp3 = xmm6; + Register tmp4 = xmm7; + + masm.vpclmullqlqdq(tmp1, state, tmp0); // 0x00 (a0 * b0) + masm.vpclmulhqhqdq(tmp4, state, tmp0); // 0x11 (a1 * b1) + masm.vpclmullqhqdq(tmp2, state, tmp0); // 0x10 (a1 * b0) + masm.vpclmulhqlqdq(tmp3, state, tmp0); // 0x01 (a0 * b1) + + masm.vpxor(tmp2, tmp2, tmp3, AVXSize.XMM); // (a0 * b1) + (a1 * b0) + + masm.vpslldq(tmp3, tmp2, 8, AVXSize.XMM); + masm.vpsrldq(tmp2, tmp2, 8, AVXSize.XMM); + masm.vpxor(tmp1, tmp1, tmp3, AVXSize.XMM); // tmp1 and tmp4 hold the result + masm.vpxor(tmp4, tmp4, tmp2, AVXSize.XMM); // of carryless multiplication + // Follows the reduction technique mentioned in + // Shift-XOR reduction described in Gueron-Kounavis May 2010 + + // First phase of reduction + masm.vpslld(xmm8, tmp1, 31, AVXSize.XMM); // packed right shift shifting << 31 + masm.vpslld(xmm9, tmp1, 30, AVXSize.XMM); // packed right shift shifting << 30 + masm.vpslld(xmm10, tmp1, 25, AVXSize.XMM); // packed right shift shifting << 25 + // xor the shifted versions + masm.vpxor(xmm8, xmm8, xmm9, AVXSize.XMM); + masm.vpxor(xmm8, xmm8, xmm10, AVXSize.XMM); + masm.vpslldq(xmm9, xmm8, 12, AVXSize.XMM); + masm.vpsrldq(xmm8, xmm8, 4, AVXSize.XMM); + masm.vpxor(tmp1, tmp1, xmm9, AVXSize.XMM); // first phase of the reduction complete + + // Second phase of the reduction + masm.vpsrld(xmm9, tmp1, 1, AVXSize.XMM); // packed left shifting >> 1 + masm.vpsrld(xmm10, tmp1, 2, AVXSize.XMM); // packed left shifting >> 2 + masm.vpsrld(xmm11, tmp1, 7, AVXSize.XMM); // packed left shifting >> 7 + masm.vpxor(xmm9, xmm9, xmm10, AVXSize.XMM); // xor the shifted versions + masm.vpxor(xmm9, xmm9, xmm11, AVXSize.XMM); + masm.vpxor(xmm9, xmm9, xmm8, AVXSize.XMM); + masm.vpxor(tmp1, tmp1, xmm9, AVXSize.XMM); + masm.vpxor(state, tmp4, tmp1, AVXSize.XMM); // the result is in state + } + + private void generateHtblOneBlock(CompilationResultBuilder crb, AMD64MacroAssembler masm, Register htbl) { + Register t = xmm13; + + // load the original subkey hash + masm.movdqu(t, new AMD64Address(htbl)); + // shuffle using long swap mask + masm.movdqu(xmm10, recordExternalAddress(crb, ghashLongSwapMask)); + masm.vpshufb(t, t, xmm10, AVXSize.XMM); + + // Compute H' = GFMUL(H, 2) + masm.vpsrld(xmm3, t, 7, AVXSize.XMM); + masm.movdqu(xmm4, recordExternalAddress(crb, ghashShuffleMask)); + masm.vpshufb(xmm3, xmm3, xmm4, AVXSize.XMM); + masm.movl(rax, 0xff00); + masm.movdl(xmm4, rax); + masm.vpshufb(xmm4, xmm4, xmm3, AVXSize.XMM); + masm.movdqu(xmm5, recordExternalAddress(crb, ghashPolynomial)); + masm.vpand(xmm5, xmm5, xmm4, AVXSize.XMM); + masm.vpsrld(xmm3, t, 31, AVXSize.XMM); + masm.vpslld(xmm4, t, 1, AVXSize.XMM); + masm.vpslldq(xmm3, xmm3, 4, AVXSize.XMM); + masm.vpxor(t, xmm4, xmm3, AVXSize.XMM); // t holds p(x) <<1 or H * 2 + + // Adding p(x)<<1 to xmm5 which holds the reduction polynomial + masm.vpxor(t, t, xmm5, AVXSize.XMM); + masm.movdqu(new AMD64Address(htbl, 1 * 16), t); // H * 2 + } + + /** + * This method takes the subkey after expansion as input and generates the remaining powers of + * subkey H. The power of H is used in reduction process for eight block ghash. + */ + private static void generateHtblEightBlocks(AMD64MacroAssembler masm, Register htbl) { + Register t = xmm13; + Register tmp0 = xmm1; + + masm.movdqu(t, new AMD64Address(htbl, 1 * 16)); + masm.movdqu(tmp0, t); + + // tmp0 and t hold H. Now we compute powers of H by using GFMUL(H, H) + gfmul(masm, tmp0, t); + masm.movdqu(new AMD64Address(htbl, 2 * 16), t); // H ^ 2 * 2 + gfmul(masm, tmp0, t); + masm.movdqu(new AMD64Address(htbl, 3 * 16), t); // H ^ 3 * 2 + gfmul(masm, tmp0, t); + masm.movdqu(new AMD64Address(htbl, 4 * 16), t); // H ^ 4 * 2 + gfmul(masm, tmp0, t); + masm.movdqu(new AMD64Address(htbl, 5 * 16), t); // H ^ 5 * 2 + gfmul(masm, tmp0, t); + masm.movdqu(new AMD64Address(htbl, 6 * 16), t); // H ^ 6 * 2 + gfmul(masm, tmp0, t); + masm.movdqu(new AMD64Address(htbl, 7 * 16), t); // H ^ 7 * 2 + gfmul(masm, tmp0, t); + masm.movdqu(new AMD64Address(htbl, 8 * 16), t); // H ^ 8 * 2 + } +} diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64HasNegativesOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64HasNegativesOp.java index 1c15e233c0bb..c076b45d275f 100644 --- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64HasNegativesOp.java +++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64HasNegativesOp.java @@ -37,6 +37,7 @@ import org.graalvm.compiler.asm.Label; import org.graalvm.compiler.asm.amd64.AMD64Address; +import org.graalvm.compiler.asm.amd64.AVXKind; import org.graalvm.compiler.core.common.Stride; import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag; import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler; @@ -183,14 +184,14 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) { masm.bind(labelCompareWideVectors); masm.vmovdqu(vec1, new AMD64Address(ary1, len, Stride.S1)); - masm.vptest(vec1, vec2); + masm.vptest(vec1, vec2, AVXKind.AVXSize.YMM); masm.jcc(ConditionFlag.NotZero, labelTrue); masm.addqAndJcc(len, 32, ConditionFlag.NotZero, labelCompareWideVectors, false); masm.testlAndJcc(result, result, ConditionFlag.Zero, labelFalse, false); masm.vmovdqu(vec1, new AMD64Address(ary1, result, Stride.S1, -32)); - masm.vptest(vec1, vec2); + masm.vptest(vec1, vec2, AVXKind.AVXSize.YMM); masm.jccb(ConditionFlag.NotZero, labelTrue); masm.jmp(labelFalse); diff --git a/compiler/src/org.graalvm.compiler.lir/src/org/graalvm/compiler/lir/gen/LIRGeneratorTool.java b/compiler/src/org.graalvm.compiler.lir/src/org/graalvm/compiler/lir/gen/LIRGeneratorTool.java index 5a73c532fcdc..9973a5526285 100644 --- a/compiler/src/org.graalvm.compiler.lir/src/org/graalvm/compiler/lir/gen/LIRGeneratorTool.java +++ b/compiler/src/org.graalvm.compiler.lir/src/org/graalvm/compiler/lir/gen/LIRGeneratorTool.java @@ -326,6 +326,11 @@ default void emitAESDecrypt(Value from, Value to, Value key) { throw GraalError.unimplemented("No specialized implementation available"); } + @SuppressWarnings("unused") + default void emitGHASHProcessBlocks(Value state, Value hashSubkey, Value data, Value blocks) { + throw GraalError.unimplemented("No specialized implementation available"); + } + void emitBlackhole(Value operand); LIRKind getLIRKind(Stamp stamp); diff --git a/compiler/src/org.graalvm.compiler.replacements.aarch64/src/org/graalvm/compiler/replacements/aarch64/AArch64GraphBuilderPlugins.java b/compiler/src/org.graalvm.compiler.replacements.aarch64/src/org/graalvm/compiler/replacements/aarch64/AArch64GraphBuilderPlugins.java index b7a9e6dc9b4f..bcd35f56d434 100644 --- a/compiler/src/org.graalvm.compiler.replacements.aarch64/src/org/graalvm/compiler/replacements/aarch64/AArch64GraphBuilderPlugins.java +++ b/compiler/src/org.graalvm.compiler.replacements.aarch64/src/org/graalvm/compiler/replacements/aarch64/AArch64GraphBuilderPlugins.java @@ -71,6 +71,7 @@ import org.graalvm.compiler.replacements.SnippetSubstitutionInvocationPlugin; import org.graalvm.compiler.replacements.SnippetTemplate; import org.graalvm.compiler.replacements.StandardGraphBuilderPlugins.AESCryptPlugin; +import org.graalvm.compiler.replacements.StandardGraphBuilderPlugins.GHASHPlugin; import org.graalvm.compiler.replacements.StandardGraphBuilderPlugins.StringLatin1IndexOfCharPlugin; import org.graalvm.compiler.replacements.StringLatin1InflateNode; import org.graalvm.compiler.replacements.StringLatin1Snippets; @@ -115,6 +116,7 @@ public void run() { } registerStringCodingPlugins(invocationPlugins, replacements); registerAESPlugins(invocationPlugins, replacements, arch); + registerGHASHPlugin(invocationPlugins, replacements, arch); } }); } @@ -559,9 +561,22 @@ private static boolean supports(AArch64 arch, CPUFeature... features) { return true; } + public static boolean supportsAESPlugins(AArch64 arch) { + return supports(arch, CPUFeature.AES); + } + private static void registerAESPlugins(InvocationPlugins plugins, Replacements replacements, AArch64 arch) { Registration r = new Registration(plugins, "com.sun.crypto.provider.AESCrypt", replacements); - r.registerConditional(supports(arch, CPUFeature.AES), new AESCryptPlugin(ENCRYPT)); - r.registerConditional(supports(arch, CPUFeature.AES), new AESCryptPlugin(DECRYPT)); + r.registerConditional(supportsAESPlugins(arch), new AESCryptPlugin(ENCRYPT)); + r.registerConditional(supportsAESPlugins(arch), new AESCryptPlugin(DECRYPT)); + } + + public static boolean supportsGHASHPlugins(AArch64 arch) { + return supports(arch, CPUFeature.PMULL); + } + + private static void registerGHASHPlugin(InvocationPlugins plugins, Replacements replacements, AArch64 arch) { + Registration r = new Registration(plugins, "com.sun.crypto.provider.GHASH", replacements); + r.registerConditional(supportsGHASHPlugins(arch), new GHASHPlugin()); } } diff --git a/compiler/src/org.graalvm.compiler.replacements.amd64/src/org/graalvm/compiler/replacements/amd64/AMD64GraphBuilderPlugins.java b/compiler/src/org.graalvm.compiler.replacements.amd64/src/org/graalvm/compiler/replacements/amd64/AMD64GraphBuilderPlugins.java index a5b0ee7bf590..61e057a1190b 100644 --- a/compiler/src/org.graalvm.compiler.replacements.amd64/src/org/graalvm/compiler/replacements/amd64/AMD64GraphBuilderPlugins.java +++ b/compiler/src/org.graalvm.compiler.replacements.amd64/src/org/graalvm/compiler/replacements/amd64/AMD64GraphBuilderPlugins.java @@ -74,8 +74,9 @@ import org.graalvm.compiler.replacements.InvocationPluginHelper; import org.graalvm.compiler.replacements.SnippetSubstitutionInvocationPlugin; import org.graalvm.compiler.replacements.SnippetTemplate; -import org.graalvm.compiler.replacements.StandardGraphBuilderPlugins; import org.graalvm.compiler.replacements.StandardGraphBuilderPlugins.AESCryptPlugin; +import org.graalvm.compiler.replacements.StandardGraphBuilderPlugins.ArrayEqualsInvocationPlugin; +import org.graalvm.compiler.replacements.StandardGraphBuilderPlugins.GHASHPlugin; import org.graalvm.compiler.replacements.StandardGraphBuilderPlugins.StringLatin1IndexOfCharPlugin; import org.graalvm.compiler.replacements.StringLatin1InflateNode; import org.graalvm.compiler.replacements.StringLatin1Snippets; @@ -124,6 +125,7 @@ public void run() { registerArraysEqualsPlugins(invocationPlugins, replacements); registerStringCodingPlugins(invocationPlugins, replacements); registerAESPlugins(invocationPlugins, replacements, arch); + registerGHASHPlugin(invocationPlugins, replacements, arch); } }); } @@ -539,8 +541,8 @@ public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Rec private static void registerArraysEqualsPlugins(InvocationPlugins plugins, Replacements replacements) { Registration r = new Registration(plugins, Arrays.class, replacements); - r.register(new StandardGraphBuilderPlugins.ArrayEqualsInvocationPlugin(JavaKind.Float, float[].class, float[].class)); - r.register(new StandardGraphBuilderPlugins.ArrayEqualsInvocationPlugin(JavaKind.Double, double[].class, double[].class)); + r.register(new ArrayEqualsInvocationPlugin(JavaKind.Float, float[].class, float[].class)); + r.register(new ArrayEqualsInvocationPlugin(JavaKind.Double, double[].class, double[].class)); } private static void registerStringCodingPlugins(InvocationPlugins plugins, Replacements replacements) { @@ -634,9 +636,22 @@ private static boolean supports(AMD64 arch, CPUFeature... features) { return true; } + public static boolean supportsAESPlugins(AMD64 arch) { + return supports(arch, CPUFeature.AVX, CPUFeature.AES); + } + private static void registerAESPlugins(InvocationPlugins plugins, Replacements replacements, AMD64 arch) { Registration r = new Registration(plugins, "com.sun.crypto.provider.AESCrypt", replacements); - r.registerConditional(supports(arch, CPUFeature.AVX, CPUFeature.AES), new AESCryptPlugin(ENCRYPT)); - r.registerConditional(supports(arch, CPUFeature.AVX, CPUFeature.AES), new AESCryptPlugin(DECRYPT)); + r.registerConditional(supportsAESPlugins(arch), new AESCryptPlugin(ENCRYPT)); + r.registerConditional(supportsAESPlugins(arch), new AESCryptPlugin(DECRYPT)); + } + + public static boolean supportsGHASHPlugins(AMD64 arch) { + return supports(arch, CPUFeature.SSSE3, CPUFeature.CLMUL); + } + + private static void registerGHASHPlugin(InvocationPlugins plugins, Replacements replacements, AMD64 arch) { + Registration r = new Registration(plugins, "com.sun.crypto.provider.GHASH", replacements); + r.registerConditional(supportsGHASHPlugins(arch), new GHASHPlugin()); } } diff --git a/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/StandardGraphBuilderPlugins.java b/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/StandardGraphBuilderPlugins.java index 42359a29eaf2..ec554e9a22b7 100644 --- a/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/StandardGraphBuilderPlugins.java +++ b/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/StandardGraphBuilderPlugins.java @@ -163,6 +163,7 @@ import org.graalvm.compiler.replacements.nodes.AESNode.CryptMode; import org.graalvm.compiler.replacements.nodes.ArrayEqualsNode; import org.graalvm.compiler.replacements.nodes.ArrayIndexOfNode; +import org.graalvm.compiler.replacements.nodes.GHASHProcessBlocksNode; import org.graalvm.compiler.replacements.nodes.LogNode; import org.graalvm.compiler.replacements.nodes.MacroNode.MacroParams; import org.graalvm.compiler.replacements.nodes.ProfileBooleanNode; @@ -2086,4 +2087,23 @@ public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Rec return true; } } + + public static class GHASHPlugin extends InvocationPlugin { + + public GHASHPlugin() { + super("processBlocks", byte[].class, int.class, int.class, long[].class, long[].class); + } + + @Override + public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Receiver receiver, + ValueNode data, ValueNode inOffset, ValueNode blocks, ValueNode state, ValueNode hashSubkey) { + try (InvocationPluginHelper helper = new InvocationPluginHelper(b, targetMethod)) { + ValueNode dataAddress = helper.arrayElementPointer(data, JavaKind.Byte, inOffset); + ValueNode stateAddress = helper.arrayStart(state, JavaKind.Long); + ValueNode hashSubkeyAddress = helper.arrayStart(hashSubkey, JavaKind.Long); + b.add(new GHASHProcessBlocksNode(stateAddress, hashSubkeyAddress, dataAddress, blocks)); + return true; + } + } + } } diff --git a/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/CryptoForeignCalls.java b/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/CryptoForeignCalls.java index 8f2b4f56f0c3..63d1d78f45d5 100644 --- a/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/CryptoForeignCalls.java +++ b/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/CryptoForeignCalls.java @@ -25,24 +25,22 @@ package org.graalvm.compiler.replacements.nodes; import org.graalvm.compiler.core.common.spi.ForeignCallDescriptor; -import org.graalvm.compiler.nodes.NamedLocationIdentity; import org.graalvm.word.LocationIdentity; import org.graalvm.word.Pointer; -import jdk.vm.ci.meta.JavaKind; - public class CryptoForeignCalls { - public static final ForeignCallDescriptor STUB_AES_ENCRYPT = foreignCallDescriptor("aesEncrypt", Pointer.class, Pointer.class, Pointer.class); - public static final ForeignCallDescriptor STUB_AES_DECRYPT = foreignCallDescriptor("aesDecrypt", Pointer.class, Pointer.class, Pointer.class); + public static final ForeignCallDescriptor STUB_AES_ENCRYPT = foreignCallDescriptor("aesEncrypt", AESNode.KILLED_LOCATIONS, Pointer.class, Pointer.class, Pointer.class); + public static final ForeignCallDescriptor STUB_AES_DECRYPT = foreignCallDescriptor("aesDecrypt", AESNode.KILLED_LOCATIONS, Pointer.class, Pointer.class, Pointer.class); - public static final LocationIdentity[] KILLED_LOCATIONS = {NamedLocationIdentity.getArrayLocation(JavaKind.Byte)}; + public static final ForeignCallDescriptor STUB_GHASH_PROCESS_BLOCKS = foreignCallDescriptor("ghashProcessBlocks", GHASHProcessBlocksNode.KILLED_LOCATIONS, + Pointer.class, Pointer.class, Pointer.class, Pointer.class); - public static final ForeignCallDescriptor[] STUBS = { + public static final ForeignCallDescriptor[] AES_STUBS = { STUB_AES_ENCRYPT, STUB_AES_DECRYPT}; - private static ForeignCallDescriptor foreignCallDescriptor(String name, Class... argTypes) { - return new ForeignCallDescriptor(name, void.class, argTypes, false, KILLED_LOCATIONS, false, false); + private static ForeignCallDescriptor foreignCallDescriptor(String name, LocationIdentity[] killLocations, Class... argTypes) { + return new ForeignCallDescriptor(name, void.class, argTypes, false, killLocations, false, false); } } diff --git a/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/GHASHProcessBlocksNode.java b/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/GHASHProcessBlocksNode.java new file mode 100644 index 000000000000..729a9f0b5b82 --- /dev/null +++ b/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/GHASHProcessBlocksNode.java @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.graalvm.compiler.replacements.nodes; + +import java.util.EnumSet; + +import org.graalvm.compiler.core.common.spi.ForeignCallDescriptor; +import org.graalvm.compiler.core.common.type.StampFactory; +import org.graalvm.compiler.graph.NodeClass; +import org.graalvm.compiler.lir.GenerateStub; +import org.graalvm.compiler.nodeinfo.InputType; +import org.graalvm.compiler.nodeinfo.NodeCycles; +import org.graalvm.compiler.nodeinfo.NodeInfo; +import org.graalvm.compiler.nodeinfo.NodeSize; +import org.graalvm.compiler.nodes.NamedLocationIdentity; +import org.graalvm.compiler.nodes.ValueNode; +import org.graalvm.compiler.nodes.spi.NodeLIRBuilderTool; +import org.graalvm.word.LocationIdentity; +import org.graalvm.word.Pointer; + +import jdk.vm.ci.meta.JavaKind; + +@NodeInfo(allowedUsageTypes = {InputType.Memory}, cycles = NodeCycles.CYCLES_128, size = NodeSize.SIZE_128) +public class GHASHProcessBlocksNode extends MemoryKillStubIntrinsicNode { + + public static final NodeClass TYPE = NodeClass.create(GHASHProcessBlocksNode.class); + public static final LocationIdentity[] KILLED_LOCATIONS = {NamedLocationIdentity.getArrayLocation(JavaKind.Long)}; + + @Input protected ValueNode state; + @Input protected ValueNode hashSubkey; + @Input protected ValueNode data; + @Input protected ValueNode blocks; + + public GHASHProcessBlocksNode(ValueNode state, ValueNode hashSubkey, ValueNode data, ValueNode blocks) { + this(state, + hashSubkey, + data, + blocks, + null); + } + + public GHASHProcessBlocksNode(ValueNode state, ValueNode hashSubkey, ValueNode data, ValueNode blocks, EnumSet runtimeCheckedCPUFeatures) { + super(TYPE, StampFactory.forVoid(), runtimeCheckedCPUFeatures, LocationIdentity.any()); + this.state = state; + this.hashSubkey = hashSubkey; + this.data = data; + this.blocks = blocks; + } + + @Override + public ValueNode[] getForeignCallArguments() { + return new ValueNode[]{state, hashSubkey, data, blocks}; + } + + @Override + public LocationIdentity[] getKilledLocationIdentities() { + return KILLED_LOCATIONS; + } + + @NodeIntrinsic + @GenerateStub(name = "ghashProcessBlocks") + public static native void apply(Pointer state, + Pointer hashSubkey, + Pointer data, + Pointer blocks); + + @NodeIntrinsic + public static native void apply(Pointer state, + Pointer hashSubkey, + Pointer data, + Pointer blocks, + @ConstantNodeParameter EnumSet runtimeCheckedCPUFeatures); + + @Override + public ForeignCallDescriptor getForeignCallDescriptor() { + return CryptoForeignCalls.STUB_GHASH_PROCESS_BLOCKS; + } + + @Override + public void emitIntrinsic(NodeLIRBuilderTool gen) { + gen.getLIRGeneratorTool().emitGHASHProcessBlocks(gen.operand(state), gen.operand(hashSubkey), gen.operand(data), gen.operand(blocks)); + } +} diff --git a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/cpufeature/Stubs.java b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/cpufeature/Stubs.java index 8dec802fe2f8..c4ad53a41334 100644 --- a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/cpufeature/Stubs.java +++ b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/cpufeature/Stubs.java @@ -27,6 +27,7 @@ import static jdk.vm.ci.amd64.AMD64.CPUFeature.AES; import static jdk.vm.ci.amd64.AMD64.CPUFeature.AVX; import static jdk.vm.ci.amd64.AMD64.CPUFeature.AVX2; +import static jdk.vm.ci.amd64.AMD64.CPUFeature.CLMUL; import static jdk.vm.ci.amd64.AMD64.CPUFeature.POPCNT; import static jdk.vm.ci.amd64.AMD64.CPUFeature.SSE3; import static jdk.vm.ci.amd64.AMD64.CPUFeature.SSE4_1; @@ -39,6 +40,7 @@ import org.graalvm.compiler.debug.GraalError; import org.graalvm.compiler.nodes.ValueNode; import org.graalvm.compiler.replacements.nodes.AESNode; +import org.graalvm.compiler.replacements.nodes.GHASHProcessBlocksNode; import org.graalvm.nativeimage.ImageSingletons; import org.graalvm.nativeimage.Platform; import org.graalvm.nativeimage.Platforms; @@ -62,11 +64,15 @@ public static class AMD64Features { AVX, AVX2); public static final EnumSet AES_CPU_FEATURES_AMD64 = EnumSet.of(AVX, AES); + public static final EnumSet GHASH_CPU_FEATURES_AMD64 = EnumSet.of(AVX, CLMUL); public static EnumSet getRequiredCPUFeatures(Class klass) { if (AESNode.class.equals(klass)) { return AES_CPU_FEATURES_AMD64; } + if (GHASHProcessBlocksNode.class.equals(klass)) { + return GHASH_CPU_FEATURES_AMD64; + } return RUNTIME_CHECKED_CPU_FEATURES_AMD64; } } @@ -75,11 +81,15 @@ public static EnumSet getRequiredCPUFeatures(Class EMPTY_CPU_FEATURES_AARCH64 = EnumSet.noneOf(AArch64.CPUFeature.class); public static final EnumSet AES_CPU_FEATURES_AARCH64 = EnumSet.of(AArch64.CPUFeature.AES); + public static final EnumSet GHASH_CPU_FEATURES_AARCH64 = EnumSet.of(AArch64.CPUFeature.PMULL); public static EnumSet getRequiredCPUFeatures(Class klass) { if (AESNode.class.equals(klass)) { return AES_CPU_FEATURES_AARCH64; } + if (GHASHProcessBlocksNode.class.equals(klass)) { + return GHASH_CPU_FEATURES_AARCH64; + } return EMPTY_CPU_FEATURES_AARCH64; } } diff --git a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AARCH64StubForeignCallsFeature.java b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AARCH64StubForeignCallsFeature.java index 7fcd02949eb9..0539d678eaad 100644 --- a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AARCH64StubForeignCallsFeature.java +++ b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AARCH64StubForeignCallsFeature.java @@ -26,6 +26,7 @@ import static com.oracle.svm.core.cpufeature.Stubs.AArch64Features.AES_CPU_FEATURES_AARCH64; import static com.oracle.svm.core.cpufeature.Stubs.AArch64Features.EMPTY_CPU_FEATURES_AARCH64; +import static com.oracle.svm.core.cpufeature.Stubs.AArch64Features.GHASH_CPU_FEATURES_AARCH64; import org.graalvm.compiler.replacements.nodes.ArrayIndexOfForeignCalls; import org.graalvm.compiler.replacements.nodes.CryptoForeignCalls; @@ -41,7 +42,8 @@ public class AARCH64StubForeignCallsFeature extends StubForeignCallsFeatureBase public AARCH64StubForeignCallsFeature() { super(new StubDescriptor[]{ new StubDescriptor(ArrayIndexOfForeignCalls.STUBS_AARCH64, true, EMPTY_CPU_FEATURES_AARCH64, EMPTY_CPU_FEATURES_AARCH64), - new StubDescriptor(CryptoForeignCalls.STUBS, false, AES_CPU_FEATURES_AARCH64, AES_CPU_FEATURES_AARCH64), + new StubDescriptor(CryptoForeignCalls.AES_STUBS, false, AES_CPU_FEATURES_AARCH64, AES_CPU_FEATURES_AARCH64), + new StubDescriptor(CryptoForeignCalls.STUB_GHASH_PROCESS_BLOCKS, false, GHASH_CPU_FEATURES_AARCH64, GHASH_CPU_FEATURES_AARCH64), }); } } diff --git a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AMD64StubForeignCallsFeature.java b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AMD64StubForeignCallsFeature.java index de21310f1432..23e65eb5b7b7 100644 --- a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AMD64StubForeignCallsFeature.java +++ b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AMD64StubForeignCallsFeature.java @@ -25,8 +25,11 @@ package com.oracle.svm.graal.stubs; import static com.oracle.svm.core.cpufeature.Stubs.AMD64Features.AES_CPU_FEATURES_AMD64; +import static com.oracle.svm.core.cpufeature.Stubs.AMD64Features.GHASH_CPU_FEATURES_AMD64; import static com.oracle.svm.core.cpufeature.Stubs.AMD64Features.RUNTIME_CHECKED_CPU_FEATURES_AMD64; +import static jdk.vm.ci.amd64.AMD64.CPUFeature.CLMUL; import static jdk.vm.ci.amd64.AMD64.CPUFeature.SSE2; +import static jdk.vm.ci.amd64.AMD64.CPUFeature.SSSE3; import java.util.EnumSet; @@ -64,7 +67,8 @@ public AMD64StubForeignCallsFeature() { new StubDescriptor(ArrayRegionCompareToForeignCalls.STUBS, true, BASELINE, RUNTIME_CHECKED_CPU_FEATURES_AMD64), new StubDescriptor(VectorizedMismatchForeignCalls.STUB, true, BASELINE, RUNTIME_CHECKED_CPU_FEATURES_AMD64), new StubDescriptor(VectorizedMismatchForeignCalls.STUB, true, BASELINE, RUNTIME_CHECKED_CPU_FEATURES_AMD64), - new StubDescriptor(CryptoForeignCalls.STUBS, false, AES_CPU_FEATURES_AMD64, AES_CPU_FEATURES_AMD64), + new StubDescriptor(CryptoForeignCalls.AES_STUBS, false, AES_CPU_FEATURES_AMD64, AES_CPU_FEATURES_AMD64), + new StubDescriptor(CryptoForeignCalls.STUB_GHASH_PROCESS_BLOCKS, false, EnumSet.of(SSSE3, CLMUL), GHASH_CPU_FEATURES_AMD64), }); } } diff --git a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/SVMIntrinsicStubs.java b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/SVMIntrinsicStubs.java index 1c3a6bcb526d..95c6c157f782 100644 --- a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/SVMIntrinsicStubs.java +++ b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/SVMIntrinsicStubs.java @@ -34,6 +34,7 @@ import org.graalvm.compiler.replacements.nodes.ArrayIndexOfNode; import org.graalvm.compiler.replacements.nodes.ArrayRegionCompareToNode; import org.graalvm.compiler.replacements.nodes.ArrayRegionEqualsNode; +import org.graalvm.compiler.replacements.nodes.GHASHProcessBlocksNode; import org.graalvm.compiler.replacements.nodes.VectorizedMismatchNode; @GeneratedStubsHolder(targetVM = "substrate", sources = { @@ -47,6 +48,7 @@ AMD64ArrayRegionEqualsWithMaskNode.class, AMD64CalcStringAttributesNode.class, AESNode.class, + GHASHProcessBlocksNode.class, }) public final class SVMIntrinsicStubs { } From d63dbb61eff2439d08a1cf6feeb7a449c123efc9 Mon Sep 17 00:00:00 2001 From: Tom Shull Date: Mon, 29 Aug 2022 12:29:43 +0200 Subject: [PATCH 2/4] aarch64 assembler refactorings --- .../asm/aarch64/AArch64ASIMDAssembler.java | 62 ++++++++++++------- .../aarch64/AArch64ASIMDMacroAssembler.java | 15 +++++ .../aarch64/AArch64GHASHProcessBlocksOp.java | 28 ++++----- ...va => AArch64StubForeignCallsFeature.java} | 4 +- 4 files changed, 71 insertions(+), 38 deletions(-) rename substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/{AARCH64StubForeignCallsFeature.java => AArch64StubForeignCallsFeature.java} (95%) diff --git a/compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDAssembler.java b/compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDAssembler.java index 1166722f1d1f..b8fa77e2fc69 100644 --- a/compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDAssembler.java +++ b/compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDAssembler.java @@ -562,6 +562,7 @@ public enum ASIMDInstruction { INSGEN(0b0011 << 11), SMOV(0b0101 << 11), UMOV(0b0111 << 11), + INSELEM(0b1 << 29), /* Advanced SIMD two-register miscellaneous (C4-361). */ /* size xx */ @@ -573,7 +574,6 @@ public enum ASIMDInstruction { CMLT_ZERO(0b01010 << 12), ABS(0b01011 << 12), XTN(0b10010 << 12), - PMULL(0b1110 << 12), /* size 0x */ FCVTN(0b10110 << 12), FCVTL(0b10111 << 12), @@ -609,6 +609,7 @@ public enum ASIMDInstruction { /* Advanced SIMD three different (C4-365). */ SMLAL(0b1000 << 12), SMLSL(0b1010 << 12), + PMULL(0b1110 << 12), UMLAL(UBit | 0b1000 << 12), UMLSL(UBit | 0b1010 << 12), @@ -842,19 +843,14 @@ private void permuteEncoding(ASIMDInstruction instr, ASIMDSize size, ElementSize } private void copyEncoding(ASIMDInstruction instr, boolean setQBit, ElementSize eSize, Register dst, Register src, int index) { + copyEncoding(instr, 0, setQBit, eSize, dst, src, index); + } + + private void copyEncoding(ASIMDInstruction instr, int extraEncoding, boolean setQBit, ElementSize eSize, Register dst, Register src, int index) { assert index >= 0 && index < ASIMDSize.FullReg.bytes() / eSize.bytes(); int baseEncoding = 0b0_0_0_01110000_00000_0_0000_1_00000_00000; int imm5Encoding = (index * 2 * eSize.bytes() | eSize.bytes()) << 16; - emitInt(instr.encoding | baseEncoding | qBit(setQBit) | imm5Encoding | rd(dst) | rs1(src)); - } - - private void copyEncoding(boolean setQBit, ElementSize eSize, Register dst, int indexDst, Register src, int indexSrc) { - assert indexDst >= 0 && indexDst < ASIMDSize.FullReg.bytes() / eSize.bytes(); - assert indexSrc >= 0 && indexSrc < ASIMDSize.FullReg.bytes() / eSize.bytes(); - int baseEncoding = 0b0_0_1_01110000_00000_0_0000_1_00000_00000; - int imm5Encoding = (indexDst * 2 * eSize.bytes() | eSize.bytes()) << 16; - int imm4Encoding = (indexSrc * eSize.bytes()) << 11; - emitInt(imm4Encoding | baseEncoding | qBit(setQBit) | imm5Encoding | rd(dst) | rs1(src)); + emitInt(instr.encoding | extraEncoding | baseEncoding | qBit(setQBit) | imm5Encoding | rd(dst) | rs1(src)); } private void twoRegMiscEncoding(ASIMDInstruction instr, ASIMDSize size, int eSizeEncoding, Register dst, Register src) { @@ -2083,12 +2079,16 @@ public void fsubVVV(ASIMDSize size, ElementSize eSize, Register dst, Register sr * * @param eSize size of value to duplicate. * @param dst SIMD register. - * @param indexDst offset of value to store. + * @param dstIdx offset of value to store. * @param src SIMD register. - * @param indexSrc offset of value to duplicate. + * @param srcIdx offset of value to duplicate. */ - public void insVV(ElementSize eSize, Register dst, int indexDst, Register src, int indexSrc) { - copyEncoding(true, eSize, dst, indexDst, src, indexSrc); + public void insXX(ElementSize eSize, Register dst, int dstIdx, Register src, int srcIdx) { + assert dstIdx >= 0 && dstIdx < ASIMDSize.FullReg.bytes() / eSize.bytes(); + assert srcIdx >= 0 && srcIdx < ASIMDSize.FullReg.bytes() / eSize.bytes(); + + int srcIdxEncoding = (srcIdx * eSize.bytes()) << 11; + copyEncoding(ASIMDInstruction.INSELEM, srcIdxEncoding, true, eSize, dst, src, dstIdx); } /** @@ -2368,23 +2368,41 @@ public void orrVVV(ASIMDSize size, Register dst, Register src1, Register src2) { } /** - * C7.2.215 Polynomial Multiply Long.
+ * C7.2.215 Polynomial Multiply Long (lower half).
+ * + * This instruction multiplies corresponding elements in the lower half of the vectors. + * + * @param srcESize source element size. Must be ElementSize.Byte or ElementSize.DoubleWord. + * @param dst SIMD register. + * @param src1 SIMD register. + * @param src2 SIMD register. + */ + public void pmullVVV(ElementSize srcESize, Register dst, Register src1, Register src2) { + assert dst.getRegisterCategory().equals(SIMD); + assert src1.getRegisterCategory().equals(SIMD); + assert src2.getRegisterCategory().equals(SIMD); + assert srcESize == ElementSize.Byte || srcESize == ElementSize.DoubleWord; + + threeDifferentEncoding(ASIMDInstruction.PMULL, false, elemSizeXX(srcESize), dst, src1, src2); + } + + /** + * C7.2.215 Polynomial Multiply Long (upper half).
* - * This instruction multiplies corresponding elements in the lower or upper half of the vectors. + * This instruction multiplies corresponding elements in the upper half of the vectors. * - * @param size source register size. - * @param elementSize source element size. Must be ElementSize.Byte or ElementSize.DoubleWord. + * @param srcESize source element size. Must be ElementSize.Byte or ElementSize.DoubleWord. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ - public void pmullVVV(ASIMDSize size, ElementSize elementSize, Register dst, Register src1, Register src2) { + public void pmull2VVV(ElementSize srcESize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD); assert src1.getRegisterCategory().equals(SIMD); assert src2.getRegisterCategory().equals(SIMD); - assert elementSize == ElementSize.Byte || elementSize == ElementSize.DoubleWord; + assert srcESize == ElementSize.Byte || srcESize == ElementSize.DoubleWord; - threeDifferentEncoding(ASIMDInstruction.PMULL, size == ASIMDSize.FullReg, elemSizeXX(elementSize), dst, src1, src2); + threeDifferentEncoding(ASIMDInstruction.PMULL, true, elemSizeXX(srcESize), dst, src1, src2); } /** diff --git a/compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDMacroAssembler.java b/compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDMacroAssembler.java index 6b6efd074e84..78ef5128e037 100644 --- a/compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDMacroAssembler.java +++ b/compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDMacroAssembler.java @@ -275,6 +275,21 @@ public void revVV(ASIMDSize size, ElementSize eSize, Register dst, Register src) } } + /** + * C7.2.200 Move vector element to another vector element.
+ *

+ * Preferred alias for insert vector element from another vector element. + * + * @param eSize size of value to duplicate. + * @param dst SIMD register. + * @param dstIdx offset of value to store. + * @param src SIMD register. + * @param srcIdx offset of value to duplicate. + */ + public void movXX(ElementSize eSize, Register dst, int dstIdx, Register src, int srcIdx) { + insXX(eSize, dst, dstIdx, src, srcIdx); + } + /** * C7.2.207 Bitwise not.
*

diff --git a/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64GHASHProcessBlocksOp.java b/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64GHASHProcessBlocksOp.java index 1aba5310a1fc..ba4fda553578 100644 --- a/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64GHASHProcessBlocksOp.java +++ b/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64GHASHProcessBlocksOp.java @@ -500,16 +500,16 @@ public void generate(int index) { masm.neon.extVVV(ASIMDSize.FullReg, tmp1, b, b, 0x08); break; case 1: - masm.neon.pmullVVV(ASIMDSize.FullReg, ElementSize.DoubleWord, resultHi, b, a); // A1*B1 + masm.neon.pmull2VVV(ElementSize.DoubleWord, resultHi, b, a); // A1*B1 break; case 2: masm.neon.eorVVV(ASIMDSize.FullReg, tmp1, tmp1, b); // (B1+B0) break; case 3: - masm.neon.pmullVVV(ASIMDSize.HalfReg, ElementSize.DoubleWord, resultLo, b, a); // A0*B0 + masm.neon.pmullVVV(ElementSize.DoubleWord, resultLo, b, a); // A0*B0 break; case 4: - masm.neon.pmullVVV(ASIMDSize.HalfReg, ElementSize.DoubleWord, tmp2, tmp1, a1XORa0); // (A1+A0)(B1+B0) + masm.neon.pmullVVV(ElementSize.DoubleWord, tmp2, tmp1, a1XORa0); // (A1+A0)(B1+B0) break; case 5: masm.neon.extVVV(ASIMDSize.FullReg, tmp1, resultLo, resultHi, 0x08); @@ -525,10 +525,10 @@ public void generate(int index) { break; // Register pair holds the result of carry-less multiplication case 9: - masm.neon.insVV(ElementSize.DoubleWord, resultHi, 0, tmp2, 1); + masm.neon.insXX(ElementSize.DoubleWord, resultHi, 0, tmp2, 1); break; case 10: - masm.neon.insVV(ElementSize.DoubleWord, resultLo, 1, tmp2, 0); + masm.neon.insXX(ElementSize.DoubleWord, resultLo, 1, tmp2, 0); break; default: throw GraalError.shouldNotReachHere(); @@ -620,7 +620,7 @@ public void generate(int index) { // bits we can do this with two 64-bit multiplications, lo*p and // hi*p. case 0: - masm.neon.pmullVVV(ASIMDSize.FullReg, ElementSize.DoubleWord, t0, hi, p); + masm.neon.pmull2VVV(ElementSize.DoubleWord, t0, hi, p); break; case 1: masm.neon.extVVV(ASIMDSize.FullReg, t1, t0, vzr, 8); @@ -635,7 +635,7 @@ public void generate(int index) { masm.neon.eorVVV(ASIMDSize.FullReg, lo, lo, t1); break; case 5: - masm.neon.pmullVVV(ASIMDSize.HalfReg, ElementSize.DoubleWord, t0, hi, p); + masm.neon.pmullVVV(ElementSize.DoubleWord, t0, hi, p); break; case 6: masm.neon.eorVVV(ASIMDSize.FullReg, result, lo, t0); @@ -718,12 +718,12 @@ private static void ghashReduce(AArch64MacroAssembler masm, // bits we can do this with two 64-bit multiplications, lo*p and // hi*p. - masm.neon.pmullVVV(ASIMDSize.FullReg, ElementSize.DoubleWord, t0, hi, p); + masm.neon.pmull2VVV(ElementSize.DoubleWord, t0, hi, p); masm.neon.extVVV(ASIMDSize.FullReg, t1, t0, vzr, 8); masm.neon.eorVVV(ASIMDSize.FullReg, hi, hi, t1); masm.neon.extVVV(ASIMDSize.FullReg, t1, vzr, t0, 8); masm.neon.eorVVV(ASIMDSize.FullReg, lo, lo, t1); - masm.neon.pmullVVV(ASIMDSize.HalfReg, ElementSize.DoubleWord, t0, hi, p); + masm.neon.pmullVVV(ElementSize.DoubleWord, t0, hi, p); masm.neon.eorVVV(ASIMDSize.FullReg, result, lo, t0); } @@ -756,10 +756,10 @@ private static void ghashMultiply(AArch64MacroAssembler masm, // B0 in b.d[0] (state) // B1 in b.d[1] masm.neon.extVVV(ASIMDSize.FullReg, tmp1, b, b, 0x08); - masm.neon.pmullVVV(ASIMDSize.FullReg, ElementSize.DoubleWord, resultHi, b, a); // A1*B1 + masm.neon.pmull2VVV(ElementSize.DoubleWord, resultHi, b, a); // A1*B1 masm.neon.eorVVV(ASIMDSize.FullReg, tmp1, tmp1, b); // (B1+B0) - masm.neon.pmullVVV(ASIMDSize.HalfReg, ElementSize.DoubleWord, resultLo, b, a); // A0*B0 - masm.neon.pmullVVV(ASIMDSize.HalfReg, ElementSize.DoubleWord, tmp2, tmp1, a1XORa0); // (A1+A0)(B1+B0) + masm.neon.pmullVVV(ElementSize.DoubleWord, resultLo, b, a); // A0*B0 + masm.neon.pmullVVV(ElementSize.DoubleWord, tmp2, tmp1, a1XORa0); // (A1+A0)(B1+B0) masm.neon.extVVV(ASIMDSize.FullReg, tmp1, resultLo, resultHi, 0x08); masm.neon.eorVVV(ASIMDSize.FullReg, tmp3, resultHi, resultLo); // A1*B1+A0*B0 @@ -767,7 +767,7 @@ private static void ghashMultiply(AArch64MacroAssembler masm, masm.neon.eorVVV(ASIMDSize.FullReg, tmp2, tmp2, tmp3); // Register pair holds the result of carry-less multiplication - masm.neon.insVV(ElementSize.DoubleWord, resultHi, 0, tmp2, 1); - masm.neon.insVV(ElementSize.DoubleWord, resultLo, 1, tmp2, 0); + masm.neon.insXX(ElementSize.DoubleWord, resultHi, 0, tmp2, 1); + masm.neon.insXX(ElementSize.DoubleWord, resultLo, 1, tmp2, 0); } } diff --git a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AARCH64StubForeignCallsFeature.java b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AArch64StubForeignCallsFeature.java similarity index 95% rename from substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AARCH64StubForeignCallsFeature.java rename to substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AArch64StubForeignCallsFeature.java index 0539d678eaad..ee90631c3969 100644 --- a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AARCH64StubForeignCallsFeature.java +++ b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AArch64StubForeignCallsFeature.java @@ -37,9 +37,9 @@ @AutomaticFeature @Platforms(AARCH64.class) -public class AARCH64StubForeignCallsFeature extends StubForeignCallsFeatureBase { +public class AArch64StubForeignCallsFeature extends StubForeignCallsFeatureBase { - public AARCH64StubForeignCallsFeature() { + public AArch64StubForeignCallsFeature() { super(new StubDescriptor[]{ new StubDescriptor(ArrayIndexOfForeignCalls.STUBS_AARCH64, true, EMPTY_CPU_FEATURES_AARCH64, EMPTY_CPU_FEATURES_AARCH64), new StubDescriptor(CryptoForeignCalls.AES_STUBS, false, AES_CPU_FEATURES_AARCH64, AES_CPU_FEATURES_AARCH64), From 2bf0473a2134d2a2366339e1d468128c37884d31 Mon Sep 17 00:00:00 2001 From: Tom Shull Date: Mon, 29 Aug 2022 13:14:05 +0200 Subject: [PATCH 3/4] consistent blocks size --- .../lir/aarch64/AArch64GHASHProcessBlocksOp.java | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64GHASHProcessBlocksOp.java b/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64GHASHProcessBlocksOp.java index ba4fda553578..4096ffbfdab2 100644 --- a/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64GHASHProcessBlocksOp.java +++ b/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64GHASHProcessBlocksOp.java @@ -46,6 +46,7 @@ import java.util.Arrays; +import jdk.vm.ci.aarch64.AArch64Kind; import org.graalvm.compiler.asm.Label; import org.graalvm.compiler.asm.aarch64.AArch64ASIMDAssembler.ASIMDSize; import org.graalvm.compiler.asm.aarch64.AArch64ASIMDAssembler.ElementSize; @@ -107,6 +108,11 @@ public AArch64GHASHProcessBlocksOp(LIRGeneratorTool tool, AllocatableValue state @Override public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) { + assert stateValue.getPlatformKind().equals(AArch64Kind.QWORD) : stateValue; + assert htblValue.getPlatformKind().equals(AArch64Kind.QWORD) : htblValue; + assert originalDataValue.getPlatformKind().equals(AArch64Kind.QWORD) : originalDataValue; + assert originalBlocksValue.getPlatformKind().equals(AArch64Kind.DWORD) : originalBlocksValue; + Label labelSmall = new Label(); Label labelDone = new Label(); @@ -119,7 +125,7 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) { Register blocks = asRegister(blocksValue); masm.mov(64, data, originalData); - masm.mov(64, blocks, originalBlocks); + masm.mov(32, blocks, originalBlocks); masm.compare(32, blocks, 8); masm.branchConditionally(ConditionFlag.LT, labelSmall); @@ -387,8 +393,8 @@ private static void ghashProcessBlocksWide(AArch64MacroAssembler masm, /* temp */v3, true).unroll(); - masm.sub(64, blocks, blocks, unrolls); - masm.compare(64, blocks, unrolls * 2); + masm.sub(32, blocks, blocks, unrolls); + masm.compare(32, blocks, unrolls * 2); masm.branchConditionally(ConditionFlag.GE, labelGHASHLoop); // Merge the #unrolls states. Note that the data for the next @@ -429,7 +435,7 @@ private static void ghashProcessBlocksWide(AArch64MacroAssembler masm, masm.neon.eorVVV(ASIMDSize.FullReg, v0, v0, offset(v0, ofs + REGISTER_STRIDE)); } - masm.sub(64, blocks, blocks, unrolls); + masm.sub(32, blocks, blocks, unrolls); // And finally bit-reverse the state back to big endian. masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v0, v0); From d19017f237c5070a54a1644041ad7d17482aa24f Mon Sep 17 00:00:00 2001 From: Yudi Zheng Date: Mon, 29 Aug 2022 21:37:10 +0200 Subject: [PATCH 4/4] Adapt to JDK upstream changes. --- .../compiler/asm/amd64/AMD64Assembler.java | 2 +- .../lir/aarch64/AArch64AESEncryptOp.java | 73 +++++++++---------- .../aarch64/AArch64GHASHProcessBlocksOp.java | 66 ++++++++--------- .../lir/amd64/AMD64EncodeArrayOp.java | 6 +- .../compiler/lir/amd64/AMD64MathCosOp.java | 13 +++- .../compiler/lir/amd64/AMD64MathExpOp.java | 8 +- .../compiler/lir/amd64/AMD64MathLog10Op.java | 8 +- .../compiler/lir/amd64/AMD64MathLogOp.java | 8 +- .../compiler/lir/amd64/AMD64MathPowOp.java | 8 +- .../compiler/lir/amd64/AMD64MathSinOp.java | 8 +- .../compiler/lir/amd64/AMD64MathTanOp.java | 8 +- .../lir/amd64/AMD64RoundFloatToIntegerOp.java | 6 +- .../lir/amd64/AMD64VectorizedMismatchOp.java | 6 +- .../lir/processor/StubPortProcessor.java | 7 +- 14 files changed, 110 insertions(+), 117 deletions(-) diff --git a/compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64Assembler.java b/compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64Assembler.java index 237c93bbdf1c..f4d8313a3975 100644 --- a/compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64Assembler.java +++ b/compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64Assembler.java @@ -4176,7 +4176,7 @@ public final void unpcklpd(Register dst, Register src) { } public final void xorb(Register dst, AMD64Address src) { - XOR.rmOp.emit(this, BYTE, dst, src); + XOR.byteRmOp.emit(this, BYTE, dst, src); } public final void xorl(Register dst, Register src) { diff --git a/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64AESEncryptOp.java b/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64AESEncryptOp.java index 414c1ddb2f19..f034231a9476 100644 --- a/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64AESEncryptOp.java +++ b/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64AESEncryptOp.java @@ -24,7 +24,6 @@ */ package org.graalvm.compiler.lir.aarch64; -import static jdk.vm.ci.aarch64.AArch64.SIMD; import static jdk.vm.ci.aarch64.AArch64.v0; import static jdk.vm.ci.aarch64.AArch64.v17; import static jdk.vm.ci.aarch64.AArch64.v18; @@ -69,13 +68,13 @@ @StubPort(path = "src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp", lineStart = 2562, lineEnd = 2592, - commit = "61e072d11c8e0cb5879bb733ed1fdd2144326bfd", + commit = "f91943c19fc0b060684a437d2c768461d54c088e", sha1 = "350e5592f4df298c7ee648581bb1e8342edf9a05") @StubPort(path = "src/hotspot/cpu/aarch64/macroAssembler_aarch64_aes.cpp", lineStart = 112, lineEnd = 283, - commit = "61e072d11c8e0cb5879bb733ed1fdd2144326bfd", - sha1 = "bb8410fff34e13647ce0411bc64de8fd279cfbff") + commit = "2fe0ce01485d7b84dc109d3d4f24bdd908c0e7cf", + sha1 = "0809579798e28fe7d2439e9ac5d5f8e23f1fcd21") // @formatter:on public final class AArch64AESEncryptOp extends AArch64LIRInstruction { @@ -126,29 +125,12 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) { masm.ldr(32, keylen, AArch64Address.createImmediateAddress(32, IMMEDIATE_SIGNED_UNSCALED, key, lengthOffset)); aesencLoadkeys(masm, key, keylen); - // Uses expanded key in v17..v31 - // Returns encrypted values in inputs. - // If to != noreg, store value at to; likewise from - // Preserves key, keylen - // Increments from, to - // Input data in v0, v1, ... - // unrolls controls the number of times to unroll the generated function - new AESKernelGenerator(masm, 1, from, to, keylen, v0, v17).unroll(); + aesecbEncrypt(masm, from, to, keylen, v0, 1); } } - private static int indexOf(Register reg) { - assert SIMD.equals(reg.getRegisterCategory()); - for (int i = 0; i < AArch64.simdRegisters.size(); i++) { - if (reg.equals(AArch64.simdRegisters.get(i))) { - return i; - } - } - throw GraalError.shouldNotReachHere("unknown register "); - } - - static Register offset(Register base, int offset) { - return AArch64.simdRegisters.get(indexOf(base) + offset); + static Register asFloatRegister(Register base, int offset) { + return AArch64.simdRegisters.get(base.encoding + offset); } static void aesencLoadkeys(AArch64MacroAssembler masm, Register key, Register keylen) { @@ -301,10 +283,10 @@ public void generate(int index) { } break; case 2: - aesRound(data, offset(subkeys, 0)); + aesRound(data, asFloatRegister(subkeys, 0)); break; case 3: - aesRound(data, offset(subkeys, 1)); + aesRound(data, asFloatRegister(subkeys, 1)); break; case 4: if (once) { @@ -312,10 +294,10 @@ public void generate(int index) { } break; case 5: - aesRound(data, offset(subkeys, 2)); + aesRound(data, asFloatRegister(subkeys, 2)); break; case 6: - aesRound(data, offset(subkeys, 3)); + aesRound(data, asFloatRegister(subkeys, 3)); break; case 7: if (once) { @@ -323,37 +305,37 @@ public void generate(int index) { } break; case 8: - aesRound(data, offset(subkeys, 4)); + aesRound(data, asFloatRegister(subkeys, 4)); break; case 9: - aesRound(data, offset(subkeys, 5)); + aesRound(data, asFloatRegister(subkeys, 5)); break; case 10: - aesRound(data, offset(subkeys, 6)); + aesRound(data, asFloatRegister(subkeys, 6)); break; case 11: - aesRound(data, offset(subkeys, 7)); + aesRound(data, asFloatRegister(subkeys, 7)); break; case 12: - aesRound(data, offset(subkeys, 8)); + aesRound(data, asFloatRegister(subkeys, 8)); break; case 13: - aesRound(data, offset(subkeys, 9)); + aesRound(data, asFloatRegister(subkeys, 9)); break; case 14: - aesRound(data, offset(subkeys, 10)); + aesRound(data, asFloatRegister(subkeys, 10)); break; case 15: - aesRound(data, offset(subkeys, 11)); + aesRound(data, asFloatRegister(subkeys, 11)); break; case 16: - aesRound(data, offset(subkeys, 12)); + aesRound(data, asFloatRegister(subkeys, 12)); break; case 17: - masm.neon.aese(data, offset(subkeys, 13)); + masm.neon.aese(data, asFloatRegister(subkeys, 13)); break; case 18: - masm.neon.eorVVV(ASIMDSize.FullReg, data, data, offset(subkeys, 14)); + masm.neon.eorVVV(ASIMDSize.FullReg, data, data, asFloatRegister(subkeys, 14)); break; case 19: if (!to.equals(Register.None)) { @@ -372,7 +354,7 @@ public KernelGenerator next() { from, to, keylen, - offset(data, 1), + asFloatRegister(data, 1), subkeys, false); } @@ -382,4 +364,15 @@ public int length() { return 20; } } + + // Uses expanded key in v17..v31 + // Returns encrypted values in inputs. + // If to != noreg, store value at to; likewise from + // Preserves key, keylen + // Increments from, to + // Input data in v0, v1, ... + // unrolls controls the number of times to unroll the generated function + static void aesecbEncrypt(AArch64MacroAssembler masm, Register from, Register to, Register keylen, Register data, int unrolls) { + new AESKernelGenerator(masm, unrolls, from, to, keylen, data, v17).unroll(); + } } diff --git a/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64GHASHProcessBlocksOp.java b/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64GHASHProcessBlocksOp.java index 4096ffbfdab2..c76c8774c669 100644 --- a/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64GHASHProcessBlocksOp.java +++ b/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64GHASHProcessBlocksOp.java @@ -42,11 +42,10 @@ import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED; import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_SIGNED_UNSCALED; import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG; -import static org.graalvm.compiler.lir.aarch64.AArch64AESEncryptOp.offset; +import static org.graalvm.compiler.lir.aarch64.AArch64AESEncryptOp.asFloatRegister; import java.util.Arrays; -import jdk.vm.ci.aarch64.AArch64Kind; import org.graalvm.compiler.asm.Label; import org.graalvm.compiler.asm.aarch64.AArch64ASIMDAssembler.ASIMDSize; import org.graalvm.compiler.asm.aarch64.AArch64ASIMDAssembler.ElementSize; @@ -60,21 +59,22 @@ import org.graalvm.compiler.lir.gen.LIRGeneratorTool; import jdk.vm.ci.aarch64.AArch64; +import jdk.vm.ci.aarch64.AArch64Kind; import jdk.vm.ci.code.Register; import jdk.vm.ci.meta.AllocatableValue; import jdk.vm.ci.meta.Value; // @formatter:off @StubPort(path = "src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp", - lineStart = 5823, - lineEnd = 5957, - commit = "27af0144ea57e86d9b81c2b328fad66e4a046f61", + lineStart = 5831, + lineEnd = 5965, + commit = "f91943c19fc0b060684a437d2c768461d54c088e", sha1 = "f11f84b57df21c9b49473f204e11efc0e6da53d0") @StubPort(path = "src/hotspot/cpu/aarch64/macroAssembler_aarch64_aes.cpp", lineStart = 285, - lineEnd = 680, - commit = "27af0144ea57e86d9b81c2b328fad66e4a046f61", - sha1 = "087f57262da406b3d20e61d03eab5e9303dfba4c") + lineEnd = 691, + commit = "2fe0ce01485d7b84dc109d3d4f24bdd908c0e7cf", + sha1 = "75163bb4c510e3fa9f2347c5017561493d893691") // @formatter:on public final class AArch64GHASHProcessBlocksOp extends AArch64LIRInstruction { @@ -318,7 +318,7 @@ private static void ghashProcessBlocksWide(AArch64MacroAssembler masm, for (int i = 1; i < unrolls; i++) { int ofs = i * REGISTER_STRIDE; // zero each state register - masm.neon.eorVVV(ASIMDSize.FullReg, offset(v0, ofs), offset(v0, ofs), offset(v0, ofs)); + masm.neon.eorVVV(ASIMDSize.FullReg, asFloatRegister(v0, ofs), asFloatRegister(v0, ofs), asFloatRegister(v0, ofs)); } // long-swap subkeyH into a1XORa0 @@ -328,7 +328,7 @@ private static void ghashProcessBlocksWide(AArch64MacroAssembler masm, // Load #unrolls blocks of data for (int ofs = 0; ofs < unrolls * REGISTER_STRIDE; ofs += REGISTER_STRIDE) { - masm.fldr(128, offset(v2, ofs), AArch64Address.createImmediateAddress(128, IMMEDIATE_POST_INDEXED, data, 0x10)); + masm.fldr(128, asFloatRegister(v2, ofs), AArch64Address.createImmediateAddress(128, IMMEDIATE_POST_INDEXED, data, 0x10)); } // Register assignments, replicated across 4 clones, v0 ... v23 @@ -361,8 +361,8 @@ private static void ghashProcessBlocksWide(AArch64MacroAssembler masm, // Xor data into current state for (int ofs = 0; ofs < unrolls * REGISTER_STRIDE; ofs += REGISTER_STRIDE) { // bit-swapped data ^ bit-swapped state - masm.neon.rbitVV(ASIMDSize.FullReg, offset(v2, ofs), offset(v2, ofs)); - masm.neon.eorVVV(ASIMDSize.FullReg, offset(v2, ofs), offset(v0, ofs), offset(v2, ofs)); + masm.neon.rbitVV(ASIMDSize.FullReg, asFloatRegister(v2, ofs), asFloatRegister(v2, ofs)); + masm.neon.eorVVV(ASIMDSize.FullReg, asFloatRegister(v2, ofs), asFloatRegister(v0, ofs), asFloatRegister(v2, ofs)); } // Generate fully-unrolled multiply-reduce in two stages. @@ -405,9 +405,9 @@ private static void ghashProcessBlocksWide(AArch64MacroAssembler masm, int ofs = i * REGISTER_STRIDE; masm.fldr(128, hPrime, AArch64Address.createImmediateAddress(128, IMMEDIATE_SIGNED_UNSCALED, subkeyH, 16 * (unrolls - i - 1))); - masm.neon.rbitVV(ASIMDSize.FullReg, offset(v2, ofs), offset(v2, ofs)); + masm.neon.rbitVV(ASIMDSize.FullReg, asFloatRegister(v2, ofs), asFloatRegister(v2, ofs)); // bit-swapped data ^ bit-swapped state - masm.neon.eorVVV(ASIMDSize.FullReg, offset(v2, ofs), offset(v0, ofs), offset(v2, ofs)); + masm.neon.eorVVV(ASIMDSize.FullReg, asFloatRegister(v2, ofs), asFloatRegister(v0, ofs), asFloatRegister(v2, ofs)); masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, hPrime, hPrime); masm.neon.rbitVV(ASIMDSize.FullReg, hPrime, hPrime); @@ -416,23 +416,23 @@ private static void ghashProcessBlocksWide(AArch64MacroAssembler masm, // xor subkeyH into subkeyL (Karatsuba: (A1+A0)) masm.neon.eorVVV(ASIMDSize.FullReg, a1XORa0, a1XORa0, hPrime); ghashModmul(masm, - /* result */offset(v0, ofs), - /* resultLo */offset(v5, ofs), - /* resultHi */offset(v4, ofs), - /* b */offset(v2, ofs), + /* result */asFloatRegister(v0, ofs), + /* resultLo */asFloatRegister(v5, ofs), + /* resultHi */asFloatRegister(v4, ofs), + /* b */asFloatRegister(v2, ofs), hPrime, vzr, a1XORa0, p, - /* temps */offset(v1, ofs), - offset(v3, ofs), - /* reuse b */offset(v2, ofs)); + /* temps */asFloatRegister(v1, ofs), + asFloatRegister(v3, ofs), + /* reuse b */asFloatRegister(v2, ofs)); } // Then we sum the results. for (int i = 0; i < unrolls - 1; i++) { int ofs = i * REGISTER_STRIDE; - masm.neon.eorVVV(ASIMDSize.FullReg, v0, v0, offset(v0, ofs + REGISTER_STRIDE)); + masm.neon.eorVVV(ASIMDSize.FullReg, v0, v0, asFloatRegister(v0, ofs + REGISTER_STRIDE)); } masm.sub(32, blocks, blocks, unrolls); @@ -545,16 +545,16 @@ public void generate(int index) { public AArch64AESEncryptOp.KernelGenerator next() { return new GHASHMultiplyGenerator(masm, unrolls, - offset(resultLo, REGISTER_STRIDE), - offset(resultHi, REGISTER_STRIDE), - offset(b, REGISTER_STRIDE), + asFloatRegister(resultLo, REGISTER_STRIDE), + asFloatRegister(resultHi, REGISTER_STRIDE), + asFloatRegister(b, REGISTER_STRIDE), a, a1XORa0, p, vzr, - offset(tmp1, REGISTER_STRIDE), - offset(tmp2, REGISTER_STRIDE), - offset(tmp3, REGISTER_STRIDE)); + asFloatRegister(tmp1, REGISTER_STRIDE), + asFloatRegister(tmp2, REGISTER_STRIDE), + asFloatRegister(tmp3, REGISTER_STRIDE)); } @Override @@ -654,7 +654,7 @@ public void generate(int index) { if (!Register.None.equals(data) && once) { assert length() >= unrolls : "not enough room for interleaved loads"; if (index < unrolls) { - masm.fldr(128, offset(data, index * REGISTER_STRIDE), + masm.fldr(128, asFloatRegister(data, index * REGISTER_STRIDE), AArch64Address.createImmediateAddress(128, IMMEDIATE_POST_INDEXED, dataPtr, 0x10)); } } @@ -664,14 +664,14 @@ public void generate(int index) { public AArch64AESEncryptOp.KernelGenerator next() { return new GHASHReduceGenerator(masm, unrolls, - offset(result, REGISTER_STRIDE), - offset(lo, REGISTER_STRIDE), - offset(hi, REGISTER_STRIDE), + asFloatRegister(result, REGISTER_STRIDE), + asFloatRegister(lo, REGISTER_STRIDE), + asFloatRegister(hi, REGISTER_STRIDE), p, vzr, dataPtr, data, - offset(t1, REGISTER_STRIDE), + asFloatRegister(t1, REGISTER_STRIDE), false); } diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64EncodeArrayOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64EncodeArrayOp.java index 18da0d2bcbc8..c5451c02045d 100644 --- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64EncodeArrayOp.java +++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64EncodeArrayOp.java @@ -56,9 +56,9 @@ // @formatter:off @StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86.cpp", - lineStart = 5760, - lineEnd = 5918, - commit = "d00e7b92b4a6d33f5db6e2aedce5e058832a23de", + lineStart = 5793, + lineEnd = 5951, + commit = "926380d3b748fd591f45abc99c497abc62c52565", sha1 = "28e9e817bee0afd9e5b698c5bff3ed519e09e410") // @formatter:on @Opcode("AMD64_ENCODE_ARRAY") diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathCosOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathCosOp.java index 8783b8108264..9522140631df 100644 --- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathCosOp.java +++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathCosOp.java @@ -202,10 +202,15 @@ */ // @formatter:off @StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86_cos.cpp", - lineStart = 0, - lineEnd = 630, - commit = "e58c12e61828485bfffbc9d1b865302b93a94158", - sha1 = "cb83822ed974ba4181ff2d55869b301686e0c8c3") + lineStart = 34, + lineEnd = 612, + commit = "f3be6731d3fa4fb1b7fc42c5bcbe6a64a50eaf42", + sha1 = "9339dc67800971e1d45dd878394cb650a36ffb03") +@StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86_constants.cpp", + lineStart = 29, + lineEnd = 236, + commit = "f3be6731d3fa4fb1b7fc42c5bcbe6a64a50eaf42", + sha1 = "f89c1da45b2e91cb114e68cbe20ea6fff3bae315") // @formatter:on public final class AMD64MathCosOp extends AMD64MathIntrinsicUnaryOp { diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathExpOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathExpOp.java index 6e234aa2b631..a90dfb21176d 100644 --- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathExpOp.java +++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathExpOp.java @@ -88,10 +88,10 @@ */ // @formatter:off @StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86_exp.cpp", - lineStart = 0, - lineEnd = 406, - commit = "e58c12e61828485bfffbc9d1b865302b93a94158", - sha1 = "e8777563cb0f0f275a490992a36bbdf06bb4c4af") + lineStart = 35, + lineEnd = 391, + commit = "f3be6731d3fa4fb1b7fc42c5bcbe6a64a50eaf42", + sha1 = "b0e25b2f08183418668966dee8f3c4cd2318aaef") // @formatter:on public final class AMD64MathExpOp extends AMD64MathIntrinsicUnaryOp { diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathLog10Op.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathLog10Op.java index 51061a6866ed..59df96ee8aa4 100644 --- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathLog10Op.java +++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathLog10Op.java @@ -81,10 +81,10 @@ */ // @formatter:off @StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86_log10.cpp", - lineStart = 0, - lineEnd = 382, - commit = "e58c12e61828485bfffbc9d1b865302b93a94158", - sha1 = "e03b4280eebe9392433389ab16c4aa52bb01270b") + lineStart = 34, + lineEnd = 383, + commit = "f3be6731d3fa4fb1b7fc42c5bcbe6a64a50eaf42", + sha1 = "ad12a2bd143c9b4af247bfe2cd97c5aeacbfcfd1") // @formatter:on public final class AMD64MathLog10Op extends AMD64MathIntrinsicUnaryOp { diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathLogOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathLogOp.java index b19658ce20cb..0c894d6d60b8 100644 --- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathLogOp.java +++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathLogOp.java @@ -83,10 +83,10 @@ */ // @formatter:off @StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86_log.cpp", - lineStart = 0, - lineEnd = 362, - commit = "e58c12e61828485bfffbc9d1b865302b93a94158", - sha1 = "4fc26bdb838040042ba0a4f5c04d737705ad4a7a") + lineStart = 34, + lineEnd = 363, + commit = "f3be6731d3fa4fb1b7fc42c5bcbe6a64a50eaf42", + sha1 = "2482010183721b57ae47d581c800226ec001491a") // @formatter:on public final class AMD64MathLogOp extends AMD64MathIntrinsicUnaryOp { diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathPowOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathPowOp.java index de6dfea43f42..b7a8962a9490 100644 --- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathPowOp.java +++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathPowOp.java @@ -112,10 +112,10 @@ */ // @formatter:off @StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86_pow.cpp", - lineStart = 0, - lineEnd = 1880, - commit = "e58c12e61828485bfffbc9d1b865302b93a94158", - sha1 = "ff1905731c30cf343460e72d58537d4672b0dce2") + lineStart = 35, + lineEnd = 1881, + commit = "f3be6731d3fa4fb1b7fc42c5bcbe6a64a50eaf42", + sha1 = "45d182416a75a945c13990e76ec07c604c78825a") // @formatter:on public final class AMD64MathPowOp extends AMD64MathIntrinsicBinaryOp { diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathSinOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathSinOp.java index a85df4bbe948..689d0ac58301 100644 --- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathSinOp.java +++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathSinOp.java @@ -205,10 +205,10 @@ */ // @formatter:off @StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86_sin.cpp", - lineStart = 0, - lineEnd = 848, - commit = "e58c12e61828485bfffbc9d1b865302b93a94158", - sha1 = "4ac9bd6f8b98df9a93ab8ef7de250421605b323c") + lineStart = 35, + lineEnd = 636, + commit = "f3be6731d3fa4fb1b7fc42c5bcbe6a64a50eaf42", + sha1 = "35e247db6760c377bb6694dfe98559b8b2eaf8c0") // @formatter:on public final class AMD64MathSinOp extends AMD64MathIntrinsicUnaryOp { diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathTanOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathTanOp.java index d4c65b7f1641..14098c1d21cf 100644 --- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathTanOp.java +++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathTanOp.java @@ -131,10 +131,10 @@ */ // @formatter:off @StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86_tan.cpp", - lineStart = 0, - lineEnd = 1059, - commit = "e58c12e61828485bfffbc9d1b865302b93a94158", - sha1 = "1f1f3a6d2437b250c0d5b13e596d9ed5a14c869e") + lineStart = 34, + lineEnd = 1020, + commit = "f3be6731d3fa4fb1b7fc42c5bcbe6a64a50eaf42", + sha1 = "8a617c23d7eb9c1687a19b01d15b661cd636fe2e") // @formatter:on public final class AMD64MathTanOp extends AMD64MathIntrinsicUnaryOp { diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64RoundFloatToIntegerOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64RoundFloatToIntegerOp.java index ef2a474c7f19..150c407507d9 100644 --- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64RoundFloatToIntegerOp.java +++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64RoundFloatToIntegerOp.java @@ -49,9 +49,9 @@ */ // @formatter:off @StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86.cpp", - lineStart = 9210, - lineEnd = 9306, - commit = "d00e7b92b4a6d33f5db6e2aedce5e058832a23de", + lineStart = 9243, + lineEnd = 9339, + commit = "926380d3b748fd591f45abc99c497abc62c52565", sha1 = "7bb09de1deee91732af6a55f527c53eb33dec489") @StubPort(path = "src/hotspot/cpu/x86/stubGenerator_x86_64.cpp", lineStart = 641, diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64VectorizedMismatchOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64VectorizedMismatchOp.java index 0e62e3ca18f1..03ab2c118296 100644 --- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64VectorizedMismatchOp.java +++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64VectorizedMismatchOp.java @@ -63,9 +63,9 @@ */ // @formatter:off @StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86.cpp", - lineStart = 6380, - lineEnd = 6598, - commit = "d00e7b92b4a6d33f5db6e2aedce5e058832a23de", + lineStart = 6413, + lineEnd = 6631, + commit = "926380d3b748fd591f45abc99c497abc62c52565", sha1 = "128d88224b8fc7fa9283072966a28c14fdc1eda5") // @formatter:on @Opcode("VECTORIZED_MISMATCH") diff --git a/compiler/src/org.graalvm.compiler.lir.processor/src/org/graalvm/compiler/lir/processor/StubPortProcessor.java b/compiler/src/org.graalvm.compiler.lir.processor/src/org/graalvm/compiler/lir/processor/StubPortProcessor.java index 6446a388a0b1..b6d249f7fcf2 100644 --- a/compiler/src/org.graalvm.compiler.lir.processor/src/org/graalvm/compiler/lir/processor/StubPortProcessor.java +++ b/compiler/src/org.graalvm.compiler.lir.processor/src/org/graalvm/compiler/lir/processor/StubPortProcessor.java @@ -130,7 +130,7 @@ private static int find(Proxy proxy, String oldUrl, String newUrl, int lineStart String oldSnippet = oldUrlIn.lines().skip(lineStart).limit(lineEnd - lineStart).collect(Collectors.joining("\n")); int newLineStart = Math.max(0, lineStart - SEARCH_RANGE); int newLineEnd = lineEnd + SEARCH_RANGE; - String newFullFile = newUrlIn.lines().skip(newLineStart).limit(newLineEnd - lineStart).collect(Collectors.joining("\n")); + String newFullFile = newUrlIn.lines().skip(newLineStart).limit(newLineEnd - newLineStart).collect(Collectors.joining("\n")); int idx = newFullFile.indexOf(oldSnippet); if (idx != -1) { return newLineStart + newFullFile.substring(0, idx).split("\n").length; @@ -170,11 +170,6 @@ protected boolean doProcess(Set annotations, RoundEnviron proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(proxyURI.getHost(), proxyURI.getPort())); } - if (proxyEnv != null) { - URI proxyURI = new URI(System.getenv(HTTPS_PROXY_ENV_VAR)); - proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(proxyURI.getHost(), proxyURI.getPort())); - } - for (Element element : roundEnv.getElementsAnnotatedWith(tStubPort)) { compareDigest(md, getAnnotation(element, tStubPort.asType()), element, proxy); }