diff --git a/compiler/mx.compiler/suite.py b/compiler/mx.compiler/suite.py
index 3569b46ee659..4afacff8be1b 100644
--- a/compiler/mx.compiler/suite.py
+++ b/compiler/mx.compiler/suite.py
@@ -567,7 +567,8 @@
"dependencies" : [
"JVMCI_HOTSPOT",
"org.graalvm.compiler.api.runtime",
- "org.graalvm.compiler.replacements",
+ "org.graalvm.compiler.replacements.amd64",
+ "org.graalvm.compiler.replacements.aarch64",
"org.graalvm.compiler.printer",
"org.graalvm.compiler.runtime",
],
@@ -635,7 +636,6 @@
"dependencies" : [
"org.graalvm.compiler.core.aarch64",
"org.graalvm.compiler.hotspot",
- "org.graalvm.compiler.replacements.aarch64",
],
"requires" : [
"jdk.unsupported" # sun.misc.Unsafe
@@ -653,7 +653,6 @@
"sourceDirs" : ["src"],
"dependencies" : [
"org.graalvm.compiler.core.amd64",
- "org.graalvm.compiler.replacements.amd64",
"org.graalvm.compiler.hotspot",
],
"requiresConcealed" : {
diff --git a/compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDAssembler.java b/compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDAssembler.java
index ec4527099aef..b8fa77e2fc69 100644
--- a/compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDAssembler.java
+++ b/compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDAssembler.java
@@ -562,6 +562,7 @@ public enum ASIMDInstruction {
INSGEN(0b0011 << 11),
SMOV(0b0101 << 11),
UMOV(0b0111 << 11),
+ INSELEM(0b1 << 29),
/* Advanced SIMD two-register miscellaneous (C4-361). */
/* size xx */
@@ -590,6 +591,8 @@ public enum ASIMDInstruction {
NEG(UBit | 0b01011 << 12),
/* UBit 1, size 00 */
NOT(UBit | 0b00101 << 12),
+ /* UBit 1, size 01 */
+ RBIT(UBit | 0b00101 << 12),
/* UBit 1, size 1x */
FCMGE_ZERO(UBit | 0b01100 << 12),
FCMLE_ZERO(UBit | 0b01101 << 12),
@@ -606,6 +609,7 @@ public enum ASIMDInstruction {
/* Advanced SIMD three different (C4-365). */
SMLAL(0b1000 << 12),
SMLSL(0b1010 << 12),
+ PMULL(0b1110 << 12),
UMLAL(UBit | 0b1000 << 12),
UMLSL(UBit | 0b1010 << 12),
@@ -839,10 +843,14 @@ private void permuteEncoding(ASIMDInstruction instr, ASIMDSize size, ElementSize
}
private void copyEncoding(ASIMDInstruction instr, boolean setQBit, ElementSize eSize, Register dst, Register src, int index) {
+ copyEncoding(instr, 0, setQBit, eSize, dst, src, index);
+ }
+
+ private void copyEncoding(ASIMDInstruction instr, int extraEncoding, boolean setQBit, ElementSize eSize, Register dst, Register src, int index) {
assert index >= 0 && index < ASIMDSize.FullReg.bytes() / eSize.bytes();
int baseEncoding = 0b0_0_0_01110000_00000_0_0000_1_00000_00000;
int imm5Encoding = (index * 2 * eSize.bytes() | eSize.bytes()) << 16;
- emitInt(instr.encoding | baseEncoding | qBit(setQBit) | imm5Encoding | rd(dst) | rs1(src));
+ emitInt(instr.encoding | extraEncoding | baseEncoding | qBit(setQBit) | imm5Encoding | rd(dst) | rs1(src));
}
private void twoRegMiscEncoding(ASIMDInstruction instr, ASIMDSize size, int eSizeEncoding, Register dst, Register src) {
@@ -2063,6 +2071,26 @@ public void fsubVVV(ASIMDSize size, ElementSize eSize, Register dst, Register sr
threeSameEncoding(ASIMDInstruction.FSUB, size, elemSize1X(eSize), dst, src1, src2);
}
+ /**
+ * C7.2.175 Insert vector element from another vector element.
+ *
+ * This instruction copies the vector element of the source register to the specified vector
+ * element of the destination register.
+ *
+ * @param eSize size of value to duplicate.
+ * @param dst SIMD register.
+ * @param dstIdx offset of value to store.
+ * @param src SIMD register.
+ * @param srcIdx offset of value to duplicate.
+ */
+ public void insXX(ElementSize eSize, Register dst, int dstIdx, Register src, int srcIdx) {
+ assert dstIdx >= 0 && dstIdx < ASIMDSize.FullReg.bytes() / eSize.bytes();
+ assert srcIdx >= 0 && srcIdx < ASIMDSize.FullReg.bytes() / eSize.bytes();
+
+ int srcIdxEncoding = (srcIdx * eSize.bytes()) << 11;
+ copyEncoding(ASIMDInstruction.INSELEM, srcIdxEncoding, true, eSize, dst, src, dstIdx);
+ }
+
/**
* C7.2.176 Insert vector element from general-purpose register.
*
@@ -2339,6 +2367,59 @@ public void orrVVV(ASIMDSize size, Register dst, Register src1, Register src2) {
threeSameEncoding(ASIMDInstruction.ORR, size, elemSize10, dst, src1, src2);
}
+ /**
+ * C7.2.215 Polynomial Multiply Long (lower half).
+ *
+ * This instruction multiplies corresponding elements in the lower half of the vectors.
+ *
+ * @param srcESize source element size. Must be ElementSize.Byte or ElementSize.DoubleWord.
+ * @param dst SIMD register.
+ * @param src1 SIMD register.
+ * @param src2 SIMD register.
+ */
+ public void pmullVVV(ElementSize srcESize, Register dst, Register src1, Register src2) {
+ assert dst.getRegisterCategory().equals(SIMD);
+ assert src1.getRegisterCategory().equals(SIMD);
+ assert src2.getRegisterCategory().equals(SIMD);
+ assert srcESize == ElementSize.Byte || srcESize == ElementSize.DoubleWord;
+
+ threeDifferentEncoding(ASIMDInstruction.PMULL, false, elemSizeXX(srcESize), dst, src1, src2);
+ }
+
+ /**
+ * C7.2.215 Polynomial Multiply Long (upper half).
+ *
+ * This instruction multiplies corresponding elements in the upper half of the vectors.
+ *
+ * @param srcESize source element size. Must be ElementSize.Byte or ElementSize.DoubleWord.
+ * @param dst SIMD register.
+ * @param src1 SIMD register.
+ * @param src2 SIMD register.
+ */
+ public void pmull2VVV(ElementSize srcESize, Register dst, Register src1, Register src2) {
+ assert dst.getRegisterCategory().equals(SIMD);
+ assert src1.getRegisterCategory().equals(SIMD);
+ assert src2.getRegisterCategory().equals(SIMD);
+ assert srcESize == ElementSize.Byte || srcESize == ElementSize.DoubleWord;
+
+ threeDifferentEncoding(ASIMDInstruction.PMULL, true, elemSizeXX(srcESize), dst, src1, src2);
+ }
+
+ /**
+ * C7.2.218 Reverse Bit order.
+ * This instruction reverses the bits in each byte.
+ *
+ * @param size register size.
+ * @param dst SIMD register.
+ * @param src SIMD register.
+ */
+ public void rbitVV(ASIMDSize size, Register dst, Register src) {
+ assert dst.getRegisterCategory().equals(SIMD);
+ assert src.getRegisterCategory().equals(SIMD);
+
+ twoRegMiscEncoding(ASIMDInstruction.RBIT, size, elemSize01, dst, src);
+ }
+
/**
* C7.2.219 Reverse elements in 16-bit halfwords.
* This instruction reverses the order of 8-bit elements in each halfword.
diff --git a/compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDMacroAssembler.java b/compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDMacroAssembler.java
index 6b6efd074e84..78ef5128e037 100644
--- a/compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDMacroAssembler.java
+++ b/compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDMacroAssembler.java
@@ -275,6 +275,21 @@ public void revVV(ASIMDSize size, ElementSize eSize, Register dst, Register src)
}
}
+ /**
+ * C7.2.200 Move vector element to another vector element.
+ *
+ * Preferred alias for insert vector element from another vector element.
+ *
+ * @param eSize size of value to duplicate.
+ * @param dst SIMD register.
+ * @param dstIdx offset of value to store.
+ * @param src SIMD register.
+ * @param srcIdx offset of value to duplicate.
+ */
+ public void movXX(ElementSize eSize, Register dst, int dstIdx, Register src, int srcIdx) {
+ insXX(eSize, dst, dstIdx, src, srcIdx);
+ }
+
/**
* C7.2.207 Bitwise not.
*
diff --git a/compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64Assembler.java b/compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64Assembler.java
index 0aa015ff8225..f4d8313a3975 100644
--- a/compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64Assembler.java
+++ b/compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64Assembler.java
@@ -84,8 +84,8 @@
import java.util.EnumSet;
import org.graalvm.compiler.asm.Label;
-import org.graalvm.compiler.core.common.Stride;
import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize;
+import org.graalvm.compiler.core.common.Stride;
import org.graalvm.compiler.core.common.calc.Condition;
import org.graalvm.compiler.debug.GraalError;
import org.graalvm.compiler.options.Option;
@@ -1073,6 +1073,7 @@ private enum VEXOpAssertion {
MASK_NULL_XMM_AVX512BW_VL(CPUFeature.AVX512VL, CPUFeature.AVX512VL, null, EVEXFeatureAssertion.AVX512F_BW_VL, MASK, null, XMM, null),
MASK_NULL_XMM_AVX512DQ_VL(CPUFeature.AVX512VL, CPUFeature.AVX512VL, null, EVEXFeatureAssertion.AVX512F_DQ_VL, MASK, null, XMM, null),
MASK_XMM_XMM_AVX512F_VL(CPUFeature.AVX512VL, CPUFeature.AVX512VL, null, EVEXFeatureAssertion.AVX512F_VL, MASK, XMM, XMM, null),
+ AVX1_128ONLY_CLMUL(CPUFeature.AVX, null, CPUFeature.CLMUL, null, XMM, XMM, XMM, XMM),
AVX1_128ONLY_AES(CPUFeature.AVX, null, CPUFeature.AES, null, XMM, XMM, XMM, XMM);
private final CPUFeature l128feature;
@@ -1989,6 +1990,29 @@ public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, i
}
}
+ public static final class VexShiftImmOp extends VexOp implements VexRRIOp {
+ // @formatter:off
+ public static final VexShiftImmOp VPSLLDQ = new VexShiftImmOp("VPSLLDQ", P_66, M_0F, WIG, 0x73, 7, VEXOpAssertion.AVX1_AVX2_AVX512BW_VL, EVEXTuple.FVM, WIG);
+ public static final VexShiftImmOp VPSRLDQ = new VexShiftImmOp("VPSRLDQ", P_66, M_0F, WIG, 0x73, 3, VEXOpAssertion.AVX1_AVX2_AVX512BW_VL, EVEXTuple.FVM, WIG);
+ // @formatter:on
+
+ private final int r;
+
+ private VexShiftImmOp(String opcode, int pp, int mmmmm, int w, int op, int r, VEXOpAssertion assertion, EVEXTuple evexTuple, int wEvex) {
+ super(opcode, pp, mmmmm, w, op, assertion, evexTuple, wEvex);
+ this.r = r;
+ }
+
+ @Override
+ public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
+ GraalError.guarantee(assertion.check(asm.getFeatures(), size, null, dst, src), "emitting invalid instruction");
+ asm.vexPrefix(null, dst, src, size, pp, mmmmm, w, wEvex, false, assertion.l128feature, assertion.l256feature);
+ asm.emitByte(op);
+ asm.emitModRM(r, src);
+ asm.emitByte(imm8);
+ }
+ }
+
/**
* Masked (i.e., conditional) SIMD loads and stores.
*/
@@ -2137,6 +2161,8 @@ public static final class VexRVMIOp extends VexOp {
// AVX2 128-bit permutation
public static final VexRVMIOp VPERM2I128 = new VexRVMIOp("VPERM2I128", P_66, M_0F3A, W0, 0x46, VEXOpAssertion.AVX2_256ONLY);
+ // Carry-Less Multiplication Quadword
+ public static final VexRVMIOp VPCLMULQDQ = new VexRVMIOp("VPCLMULQDQ", P_66, M_0F3A, WIG, 0x44, VEXOpAssertion.AVX1_128ONLY_CLMUL);
// Packed Align Right
public static final VexRVMIOp VPALIGNR = new VexRVMIOp("VPALIGNR", P_66, M_0F3A, WIG, 0x0F, VEXOpAssertion.AVX1_AVX2_AVX512BW_VL, EVEXTuple.FVM, WIG);
@@ -3839,6 +3865,16 @@ public final void pslld(Register dst, int imm8) {
emitByte(imm8 & 0xFF);
}
+ public final void pslldq(Register dst, int imm8) {
+ assert isUByte(imm8) : "invalid value";
+ assert inRC(XMM, dst);
+ // XMM7 is for /7 encoding: 66 0F 73 /7 ib
+ simdPrefix(AMD64.xmm7, dst, dst, PD, P_0F, false);
+ emitByte(0x73);
+ emitModRM(7, dst);
+ emitByte(imm8 & 0xFF);
+ }
+
public final void psllq(Register dst, Register shift) {
assert inRC(XMM, dst) && inRC(XMM, shift);
simdPrefix(dst, dst, shift, PD, P_0F, false);
@@ -3969,6 +4005,39 @@ public final void punpcklbw(Register dst, Register src) {
emitModRM(dst, src);
}
+ public final void pclmulqdq(Register dst, Register src, int imm8) {
+ assert supports(CPUFeature.CLMUL);
+ assert inRC(XMM, dst) && inRC(XMM, src);
+ simdPrefix(dst, dst, src, PD, P_0F3A, false);
+ emitByte(0x44);
+ emitModRM(dst, src);
+ emitByte(imm8);
+ }
+
+ public final void vpshufb(Register dst, Register src1, Register src2, AVXSize size) {
+ VexRVMOp.VPSHUFB.emit(this, size, dst, src1, src2);
+ }
+
+ public final void vpclmulqdq(Register dst, Register nds, Register src, int imm8) {
+ VexRVMIOp.VPCLMULQDQ.emit(this, AVXSize.XMM, dst, nds, src, imm8);
+ }
+
+ public final void vpclmullqlqdq(Register dst, Register nds, Register src) {
+ VexRVMIOp.VPCLMULQDQ.emit(this, AVXSize.XMM, dst, nds, src, 0x00);
+ }
+
+ public final void vpclmulhqlqdq(Register dst, Register nds, Register src) {
+ VexRVMIOp.VPCLMULQDQ.emit(this, AVXSize.XMM, dst, nds, src, 0x01);
+ }
+
+ public final void vpclmullqhqdq(Register dst, Register nds, Register src) {
+ VexRVMIOp.VPCLMULQDQ.emit(this, AVXSize.XMM, dst, nds, src, 0x10);
+ }
+
+ public final void vpclmulhqhqdq(Register dst, Register nds, Register src) {
+ VexRVMIOp.VPCLMULQDQ.emit(this, AVXSize.XMM, dst, nds, src, 0x11);
+ }
+
public final void rcpps(Register dst, Register src) {
assert inRC(XMM, dst) && inRC(XMM, src);
simdPrefix(dst, Register.None, src, PS, P_0F, false);
@@ -4106,6 +4175,10 @@ public final void unpcklpd(Register dst, Register src) {
emitModRM(dst, src);
}
+ public final void xorb(Register dst, AMD64Address src) {
+ XOR.byteRmOp.emit(this, BYTE, dst, src);
+ }
+
public final void xorl(Register dst, Register src) {
XOR.rmOp.emit(this, DWORD, dst, src);
}
@@ -4666,6 +4739,17 @@ public final void call() {
emitInt(0);
}
+ public final void call(Label l) {
+ if (l.isBound()) {
+ emitByte(0xE8);
+ emitInt(l.position());
+ } else {
+ l.addPatchAt(position(), this);
+ emitByte(0xE8);
+ emitInt(0);
+ }
+ }
+
public final void call(Register src) {
prefix(src);
emitByte(0xFF);
@@ -4894,8 +4978,8 @@ public void clflushopt(AMD64Address adr) {
emitOperandHelper(7, adr, 0);
}
- public final void vpand(Register dst, Register nds, Register src) {
- VexRVMOp.VPAND.emit(this, AVXSize.YMM, dst, nds, src);
+ public final void vpand(Register dst, Register nds, Register src, AVXSize size) {
+ VexRVMOp.VPAND.emit(this, size, dst, nds, src);
}
public final void vpandn(Register dst, Register nds, Register src) {
@@ -4906,16 +4990,16 @@ public final void vpor(Register dst, Register nds, Register src) {
VexRVMOp.VPOR.emit(this, AVXSize.YMM, dst, nds, src);
}
- public final void vptest(Register dst, Register src) {
- VexRMOp.VPTEST.emit(this, AVXSize.YMM, dst, src);
+ public final void vptest(Register dst, Register src, AVXSize size) {
+ VexRMOp.VPTEST.emit(this, size, dst, src);
}
- public final void vpxor(Register dst, Register nds, Register src) {
- VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
+ public final void vpxor(Register dst, Register nds, Register src, AVXSize size) {
+ VexRVMOp.VPXOR.emit(this, size, dst, nds, src);
}
- public final void vpxor(Register dst, Register nds, AMD64Address src) {
- VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
+ public final void vpxor(Register dst, Register nds, AMD64Address src, AVXSize size) {
+ VexRVMOp.VPXOR.emit(this, size, dst, nds, src);
}
public final void vpsllw(Register dst, Register src, int imm8) {
@@ -4926,12 +5010,20 @@ public final void vpsrlw(Register dst, Register src, int imm8) {
VexShiftOp.VPSRLW.emit(this, AVXSize.YMM, dst, src, imm8);
}
- public final void vpslld(Register dst, Register src, int imm8) {
- VexShiftOp.VPSLLD.emit(this, AVXSize.YMM, dst, src, imm8);
+ public final void vpslld(Register dst, Register src, int imm8, AVXSize size) {
+ VexShiftOp.VPSLLD.emit(this, size, dst, src, imm8);
+ }
+
+ public final void vpslldq(Register dst, Register src, int imm8, AVXSize size) {
+ VexShiftImmOp.VPSLLDQ.emit(this, size, dst, src, imm8);
+ }
+
+ public final void vpsrld(Register dst, Register src, int imm8, AVXSize size) {
+ VexShiftOp.VPSRLD.emit(this, size, dst, src, imm8);
}
- public final void vpsrld(Register dst, Register src, int imm8) {
- VexShiftOp.VPSRLD.emit(this, AVXSize.YMM, dst, src, imm8);
+ public final void vpsrldq(Register dst, Register src, int imm8, AVXSize size) {
+ VexShiftImmOp.VPSRLDQ.emit(this, size, dst, src, imm8);
}
public final void vpcmpeqb(Register dst, Register src1, Register src2) {
diff --git a/compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64MacroAssembler.java b/compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64MacroAssembler.java
index 70b15451686b..b95222cc0306 100644
--- a/compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64MacroAssembler.java
+++ b/compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64MacroAssembler.java
@@ -600,6 +600,10 @@ public final void testlAndJcc(Register src, int imm32, ConditionFlag cc, Label b
applyMIOpAndJcc(AMD64MIOp.TEST, DWORD, src, imm32, cc, branchTarget, isShortJmp, false, null);
}
+ public final void testqAndJcc(Register src, int imm32, ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
+ applyMIOpAndJcc(AMD64MIOp.TEST, QWORD, src, imm32, cc, branchTarget, isShortJmp, false, null);
+ }
+
public final void testAndJcc(OperandSize size, AMD64Address src, int imm32, ConditionFlag cc, Label branchTarget, boolean isShortJmp, IntConsumer applyBeforeFusedPair) {
applyMIOpAndJcc(AMD64MIOp.TEST, size, src, imm32, cc, branchTarget, isShortJmp, false, applyBeforeFusedPair);
}
diff --git a/compiler/src/org.graalvm.compiler.core.aarch64/src/org/graalvm/compiler/core/aarch64/AArch64LIRGenerator.java b/compiler/src/org.graalvm.compiler.core.aarch64/src/org/graalvm/compiler/core/aarch64/AArch64LIRGenerator.java
index 753f83349f44..5f44068d7120 100644
--- a/compiler/src/org.graalvm.compiler.core.aarch64/src/org/graalvm/compiler/core/aarch64/AArch64LIRGenerator.java
+++ b/compiler/src/org.graalvm.compiler.core.aarch64/src/org/graalvm/compiler/core/aarch64/AArch64LIRGenerator.java
@@ -70,6 +70,7 @@
import org.graalvm.compiler.lir.aarch64.AArch64ControlFlow.RangeTableSwitchOp;
import org.graalvm.compiler.lir.aarch64.AArch64ControlFlow.StrategySwitchOp;
import org.graalvm.compiler.lir.aarch64.AArch64EncodeArrayOp;
+import org.graalvm.compiler.lir.aarch64.AArch64GHASHProcessBlocksOp;
import org.graalvm.compiler.lir.aarch64.AArch64Move;
import org.graalvm.compiler.lir.aarch64.AArch64Move.MembarOp;
import org.graalvm.compiler.lir.aarch64.AArch64PauseOp;
@@ -581,6 +582,11 @@ public void emitAESDecrypt(Value from, Value to, Value key) {
append(new AArch64AESDecryptOp(asAllocatable(from), asAllocatable(to), asAllocatable(key), getArrayLengthOffset() - getArrayBaseOffset(JavaKind.Int)));
}
+ @Override
+ public void emitGHASHProcessBlocks(Value state, Value hashSubkey, Value data, Value blocks) {
+ append(new AArch64GHASHProcessBlocksOp(this, asAllocatable(state), asAllocatable(hashSubkey), asAllocatable(data), asAllocatable(blocks)));
+ }
+
@Override
public void emitStringLatin1Inflate(Value src, Value dst, Value len) {
append(new AArch64StringLatin1InflateOp(this, asAllocatable(src), asAllocatable(dst), asAllocatable(len)));
diff --git a/compiler/src/org.graalvm.compiler.core.amd64/src/org/graalvm/compiler/core/amd64/AMD64LIRGenerator.java b/compiler/src/org.graalvm.compiler.core.amd64/src/org/graalvm/compiler/core/amd64/AMD64LIRGenerator.java
index 29e5a4b98a32..f60a851631f0 100644
--- a/compiler/src/org.graalvm.compiler.core.amd64/src/org/graalvm/compiler/core/amd64/AMD64LIRGenerator.java
+++ b/compiler/src/org.graalvm.compiler.core.amd64/src/org/graalvm/compiler/core/amd64/AMD64LIRGenerator.java
@@ -101,6 +101,7 @@
import org.graalvm.compiler.lir.amd64.AMD64ControlFlow.TestByteBranchOp;
import org.graalvm.compiler.lir.amd64.AMD64ControlFlow.TestConstBranchOp;
import org.graalvm.compiler.lir.amd64.AMD64EncodeArrayOp;
+import org.graalvm.compiler.lir.amd64.AMD64GHASHProcessBlocksOp;
import org.graalvm.compiler.lir.amd64.AMD64HasNegativesOp;
import org.graalvm.compiler.lir.amd64.AMD64LFenceOp;
import org.graalvm.compiler.lir.amd64.AMD64Move;
@@ -761,6 +762,11 @@ public void emitAESDecrypt(Value from, Value to, Value key) {
append(new AMD64AESDecryptOp(this, asAllocatable(from), asAllocatable(to), asAllocatable(key), getArrayLengthOffset() - getArrayBaseOffset(JavaKind.Int)));
}
+ @Override
+ public void emitGHASHProcessBlocks(Value state, Value hashSubkey, Value data, Value blocks) {
+ append(new AMD64GHASHProcessBlocksOp(this, asAllocatable(state), asAllocatable(hashSubkey), asAllocatable(data), asAllocatable(blocks)));
+ }
+
@SuppressWarnings("unchecked")
protected boolean supports(EnumSet> runtimeCheckedCPUFeatures, CPUFeature feature) {
assert runtimeCheckedCPUFeatures == null || runtimeCheckedCPUFeatures.isEmpty() || runtimeCheckedCPUFeatures.iterator().next() instanceof CPUFeature;
diff --git a/compiler/src/org.graalvm.compiler.hotspot.aarch64/src/org/graalvm/compiler/hotspot/aarch64/AArch64HotSpotForeignCallsProvider.java b/compiler/src/org.graalvm.compiler.hotspot.aarch64/src/org/graalvm/compiler/hotspot/aarch64/AArch64HotSpotForeignCallsProvider.java
index 99b27bf70112..2a2af70cc3e2 100644
--- a/compiler/src/org.graalvm.compiler.hotspot.aarch64/src/org/graalvm/compiler/hotspot/aarch64/AArch64HotSpotForeignCallsProvider.java
+++ b/compiler/src/org.graalvm.compiler.hotspot.aarch64/src/org/graalvm/compiler/hotspot/aarch64/AArch64HotSpotForeignCallsProvider.java
@@ -32,6 +32,7 @@
import static org.graalvm.compiler.hotspot.HotSpotForeignCallLinkage.JUMP_ADDRESS;
import static org.graalvm.compiler.hotspot.HotSpotForeignCallLinkage.RegisterEffect.COMPUTES_REGISTERS_KILLED;
import static org.graalvm.compiler.hotspot.HotSpotForeignCallLinkage.RegisterEffect.DESTROYS_ALL_CALLER_SAVE_REGISTERS;
+import static org.graalvm.compiler.hotspot.meta.HotSpotForeignCallDescriptor.Reexecutability.NOT_REEXECUTABLE;
import static org.graalvm.compiler.hotspot.meta.HotSpotForeignCallDescriptor.Reexecutability.REEXECUTABLE;
import static org.graalvm.compiler.hotspot.meta.HotSpotForeignCallDescriptor.Transition.LEAF;
@@ -43,9 +44,12 @@
import org.graalvm.compiler.hotspot.meta.HotSpotProviders;
import org.graalvm.compiler.hotspot.stubs.IntrinsicStubsGen;
import org.graalvm.compiler.options.OptionValues;
+import org.graalvm.compiler.replacements.aarch64.AArch64GraphBuilderPlugins;
import org.graalvm.compiler.replacements.nodes.ArrayIndexOfForeignCalls;
+import org.graalvm.compiler.replacements.nodes.CryptoForeignCalls;
import org.graalvm.compiler.word.WordTypes;
+import jdk.vm.ci.aarch64.AArch64;
import jdk.vm.ci.code.CallingConvention;
import jdk.vm.ci.code.CodeCacheProvider;
import jdk.vm.ci.code.RegisterValue;
@@ -82,6 +86,17 @@ public void initialize(HotSpotProviders providers, OptionValues options) {
link(new IntrinsicStubsGen(options, providers, registerStubCall(descriptor.getSignature(), LEAF, REEXECUTABLE, COMPUTES_REGISTERS_KILLED, NO_LOCATIONS)));
}
+ if (AArch64GraphBuilderPlugins.supportsAESPlugins((AArch64) target.arch)) {
+ for (ForeignCallDescriptor stub : CryptoForeignCalls.AES_STUBS) {
+ link(new IntrinsicStubsGen(options, providers, registerStubCall(stub.getSignature(), LEAF, NOT_REEXECUTABLE, COMPUTES_REGISTERS_KILLED, stub.getKilledLocations())));
+ }
+ }
+
+ if (AArch64GraphBuilderPlugins.supportsGHASHPlugins((AArch64) target.arch)) {
+ link(new IntrinsicStubsGen(options, providers, registerStubCall(CryptoForeignCalls.STUB_GHASH_PROCESS_BLOCKS.getSignature(),
+ LEAF, NOT_REEXECUTABLE, COMPUTES_REGISTERS_KILLED, CryptoForeignCalls.STUB_GHASH_PROCESS_BLOCKS.getKilledLocations())));
+ }
+
super.initialize(providers, options);
}
diff --git a/compiler/src/org.graalvm.compiler.hotspot.amd64/src/org/graalvm/compiler/hotspot/amd64/AMD64HotSpotForeignCallsProvider.java b/compiler/src/org.graalvm.compiler.hotspot.amd64/src/org/graalvm/compiler/hotspot/amd64/AMD64HotSpotForeignCallsProvider.java
index 723b56c8fdd9..636bdcf0422d 100644
--- a/compiler/src/org.graalvm.compiler.hotspot.amd64/src/org/graalvm/compiler/hotspot/amd64/AMD64HotSpotForeignCallsProvider.java
+++ b/compiler/src/org.graalvm.compiler.hotspot.amd64/src/org/graalvm/compiler/hotspot/amd64/AMD64HotSpotForeignCallsProvider.java
@@ -58,14 +58,17 @@
import org.graalvm.compiler.options.OptionValues;
import org.graalvm.compiler.replacements.amd64.AMD64ArrayEqualsWithMaskForeignCalls;
import org.graalvm.compiler.replacements.amd64.AMD64CalcStringAttributesForeignCalls;
+import org.graalvm.compiler.replacements.amd64.AMD64GraphBuilderPlugins;
import org.graalvm.compiler.replacements.nodes.ArrayCompareToForeignCalls;
import org.graalvm.compiler.replacements.nodes.ArrayCopyWithConversionsForeignCalls;
import org.graalvm.compiler.replacements.nodes.ArrayEqualsForeignCalls;
import org.graalvm.compiler.replacements.nodes.ArrayIndexOfForeignCalls;
import org.graalvm.compiler.replacements.nodes.ArrayRegionCompareToForeignCalls;
+import org.graalvm.compiler.replacements.nodes.CryptoForeignCalls;
import org.graalvm.compiler.replacements.nodes.VectorizedMismatchForeignCalls;
import org.graalvm.compiler.word.WordTypes;
+import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.code.CallingConvention;
import jdk.vm.ci.code.CodeCacheProvider;
import jdk.vm.ci.code.RegisterValue;
@@ -108,6 +111,17 @@ public void initialize(HotSpotProviders providers, OptionValues options) {
linkSnippetStubs(providers, options, AMD64HotspotIntrinsicStubsGen::new, AMD64ArrayEqualsWithMaskForeignCalls.STUBS);
linkSnippetStubs(providers, options, AMD64HotspotIntrinsicStubsGen::new, AMD64CalcStringAttributesForeignCalls.STUBS);
+ if (AMD64GraphBuilderPlugins.supportsAESPlugins((AMD64) target.arch)) {
+ for (ForeignCallDescriptor stub : CryptoForeignCalls.AES_STUBS) {
+ link(new IntrinsicStubsGen(options, providers, registerStubCall(stub.getSignature(), LEAF, NOT_REEXECUTABLE, COMPUTES_REGISTERS_KILLED, stub.getKilledLocations())));
+ }
+ }
+
+ if (AMD64GraphBuilderPlugins.supportsGHASHPlugins((AMD64) target.arch)) {
+ link(new IntrinsicStubsGen(options, providers, registerStubCall(CryptoForeignCalls.STUB_GHASH_PROCESS_BLOCKS.getSignature(),
+ LEAF, NOT_REEXECUTABLE, COMPUTES_REGISTERS_KILLED, CryptoForeignCalls.STUB_GHASH_PROCESS_BLOCKS.getKilledLocations())));
+ }
+
super.initialize(providers, options);
}
diff --git a/compiler/src/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/HotSpotGHASHTest.java b/compiler/src/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/HotSpotGHASHTest.java
new file mode 100644
index 000000000000..7bf23fca0c25
--- /dev/null
+++ b/compiler/src/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/HotSpotGHASHTest.java
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.graalvm.compiler.hotspot.test;
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.nio.ByteBuffer;
+
+import org.graalvm.compiler.api.test.Graal;
+import org.graalvm.compiler.hotspot.meta.UnimplementedGraalIntrinsics;
+import org.graalvm.compiler.runtime.RuntimeProvider;
+import org.graalvm.compiler.test.AddExports;
+import org.junit.Assume;
+import org.junit.AssumptionViolatedException;
+import org.junit.Before;
+import org.junit.Test;
+
+import jdk.vm.ci.code.Architecture;
+import jdk.vm.ci.code.InstalledCode;
+
+@AddExports("java.base/com.sun.crypto.provider")
+public class HotSpotGHASHTest extends HotSpotGraalCompilerTest {
+
+ private Class> classGHASH;
+ private Constructor> ghashConstructor;
+ private Method methodUpdate;
+ private Method methodDigest;
+ private Field fieldState;
+
+ @Before
+ public void init() {
+ Architecture arch = Graal.getRequiredCapability(RuntimeProvider.class).getHostBackend().getTarget().arch;
+ Assume.assumeTrue(UnimplementedGraalIntrinsics.supportsGHASHPlugins(arch));
+ try {
+ classGHASH = Class.forName("com.sun.crypto.provider.GHASH");
+ ghashConstructor = classGHASH.getDeclaredConstructor(byte[].class);
+ ghashConstructor.setAccessible(true);
+ methodUpdate = classGHASH.getDeclaredMethod("update", byte[].class, int.class, int.class);
+ methodUpdate.setAccessible(true);
+ methodDigest = classGHASH.getDeclaredMethod("digest");
+ methodDigest.setAccessible(true);
+ fieldState = classGHASH.getDeclaredField("state");
+ fieldState.setAccessible(true);
+ } catch (ClassNotFoundException | NoSuchMethodException | NoSuchFieldException e) {
+ throw new AssumptionViolatedException(e.getMessage());
+ }
+ }
+
+ private static final String HEX_DIGITS = "0123456789abcdef";
+
+ private static byte[] bytes(String hex) {
+ StringBuilder sb = new StringBuilder(hex);
+ while ((sb.length() % 32) != 0) {
+ sb.append('0');
+ }
+ String newHex = sb.toString();
+
+ byte[] result = new byte[newHex.length() / 2];
+ for (int i = 0; i < result.length; ++i) {
+ int a = HEX_DIGITS.indexOf(newHex.charAt(2 * i));
+ int b = HEX_DIGITS.indexOf(newHex.charAt(2 * i + 1));
+ if ((a | b) < 0) {
+ if (a < 0) {
+ throw new AssertionError("bad character " + (int) newHex.charAt(2 * i));
+ }
+ throw new AssertionError("bad character " + (int) newHex.charAt(2 * i + 1));
+ }
+ result[i] = (byte) ((a << 4) | b);
+ }
+ return result;
+ }
+
+ private static byte[] bytes(long l0, long l1) {
+ return ByteBuffer.allocate(16).putLong(l0).putLong(l1).array();
+ }
+
+ private Result ghash(Object ghash, long[] initState, byte[]... inputs) {
+ try {
+ long[] state = (long[]) fieldState.get(ghash);
+ System.arraycopy(initState, 0, state, 0, 2);
+ for (byte[] input : inputs) {
+ methodUpdate.invoke(ghash, input, 0, input.length);
+ }
+ return new Result(methodDigest.invoke(ghash), null);
+ } catch (Exception e) {
+ return new Result(null, e);
+ }
+ }
+
+ private void testMultipleUpdateHelper(Object ghash, String strA, String strC, String result) {
+ long[] state = new long[]{0, 0};
+ byte[][] inputs = new byte[][]{bytes(strA), bytes(strC), bytes(strA.length() * 4, strC.length() * 4)};
+ assertTrue(result.length() == 32);
+ Result expected = new Result(bytes(result), null);
+ InstalledCode intrinsic = compileAndInstallSubstitution(classGHASH, "processBlocks");
+ Result actual = ghash(ghash, state, inputs);
+ assertEquals(expected, actual);
+ intrinsic.invalidate();
+ }
+
+ @Test
+ public void testMultipleUpdate() throws InvocationTargetException, InstantiationException, IllegalAccessException {
+ Object ghash = ghashConstructor.newInstance(bytes("66e94bd4ef8a2c3b884cfa59ca342b2e"));
+ testMultipleUpdateHelper(ghash, "", "", "00000000000000000000000000000000");
+ testMultipleUpdateHelper(ghash, "", "0388dace60b6a392f328c2b971b2fe78", "f38cbb1ad69223dcc3457ae5b6b0f885");
+
+ ghash = ghashConstructor.newInstance(bytes("b83b533708bf535d0aa6e52980d53b78"));
+ testMultipleUpdateHelper(ghash,
+ "",
+ "42831ec2217774244b7221b784d0d49c" + "e3aa212f2c02a4e035c17e2329aca12e" + "21d514b25466931c7d8f6a5aac84aa05" + "1ba30b396a0aac973d58e091473f5985",
+ "7f1b32b81b820d02614f8895ac1d4eac");
+ testMultipleUpdateHelper(ghash, "feedfacedeadbeeffeedfacedeadbeef" + "abaddad2",
+ "42831ec2217774244b7221b784d0d49c" + "e3aa212f2c02a4e035c17e2329aca12e" + "21d514b25466931c7d8f6a5aac84aa05" + "1ba30b396a0aac973d58e091",
+ "698e57f70e6ecc7fd9463b7260a9ae5f");
+ testMultipleUpdateHelper(ghash,
+ "feedfacedeadbeeffeedfacedeadbeef" + "abaddad2",
+ "61353b4c2806934a777ff51fa22a4755" + "699b2a714fcdc6f83766e5f97b6c7423" + "73806900e49f24b22b097544d4896b42" + "4989b5e1ebac0f07c23f4598",
+ "df586bb4c249b92cb6922877e444d37b");
+ }
+
+ private Result ghash(Object ghash, long[] initState, byte[] input, int inOff, int inLen) {
+ try {
+ long[] state = (long[]) fieldState.get(ghash);
+ System.arraycopy(initState, 0, state, 0, 2);
+ methodUpdate.invoke(ghash, input, inOff, inLen);
+ return new Result(methodDigest.invoke(ghash), null);
+ } catch (Exception e) {
+ return new Result(null, e);
+ }
+ }
+
+ private void testGHASH(Object ghash, long[] initState, byte[] input, int inOff, int inLen) {
+ Result expected = ghash(ghash, initState, input, inOff, inLen);
+ InstalledCode intrinsic = compileAndInstallSubstitution(classGHASH, "processBlocks");
+ Result actual = ghash(ghash, initState, input, inOff, inLen);
+ assertEquals(expected, actual);
+ intrinsic.invalidate();
+ }
+
+ @Test
+ public void testOffset() throws InvocationTargetException, InstantiationException, IllegalAccessException {
+ Object ghash = ghashConstructor.newInstance(bytes(-2549203865593441186L, -7934336365809252297L));
+
+ byte[] input = new byte[]{23, 3, 3, 0, 65, 112, -106, -54, 49, -74, -104, -65, -27, 85, 53, 64, 68, 112, -1, -91, 65, -93, -102, 126, 106, 24, -38, 10, 11, 110, -85, -123, -99, 121, 1, -100,
+ 6, -52, 17, -46, 50, -75, 69, 11, -119, -109, 60, -69, -125, -83, 79, 93, -88, 24, -28, 111, 39, -105, -13, -14, -5, -5, 65, 57, 6, -112, -96, 75, 28, 42, 64, 95, -5, -40, -64,
+ -83, -6, -30, -42, 108, 64, 3, -48, 62, 100, 89, 108, -39, 96, 86, -15, -11, 115, -96, -96, 122, 9, -102, 63, 9, 4, 88, -106, -77, 91, -54, 98, 22, -91, 70, 75, 23, -93, -87,
+ 107, -96, 32, -59, 5, -70, 61, -80, 76, -113, -115, -118, 36, -119, 32, -4, 14, 83, 18, -19, 17, 19, 57, -29, -40, 94, 13, -112, 103, 102, -96, 9, -81, -10, 91, 19, 2, 41, 108,
+ -95, 44, -98, 47, -60, 97, 27, 39, -61, 117, 42, -96, -45, 75, 115, -87, -85, -39, 14, -75, -111, -102, 76, -58, -35, -126, -122, -8, -55, 81, 56, -40, -16, 84, -93, 58, -44,
+ -60, 56, -17, -96, -83, -71, 86, -59, 111, -43, -7, 84, -58, -18, -109, -22, 6, -99, -92, -33, 9, 98, 8, -2, 47, -102, 53, 124, -85, 33, 60, -108, -102, -88, -33, 50, 96, -115,
+ 14, 46, 36, 88, -61, -118, 72, 57, 13, 27, 40, 93, 44, 110, 114, -83, 126, -21, 113, -15, -16, -103, -51, 118, 12, -9, -121, -108, 19, 5, 20, -122, -29, 35, 31, -50, -81, 85,
+ 57, -82, 25, 78, -24, -102, 74, -97, 107, -22, -92, 104, -76, 77, 37, -49, -114, -100, 122, -80, 79, -48, -119, 67, 72, 88, -12, 103, 107, 5, -14, -1, 56, -66, -102, 15, -72,
+ 41, 41, -74, -9, -56, 12, -68, -120, 43, -44, -85, -45, 79, -84, -58, -81, 97, 10, 2, 60, 1, -103, -10, -98, 123, 6, -65, 17, -46, -58, -41, 103, -24, -119, -89, -93, -115, -3,
+ -55, 38, -119, -88, 83, -36, 29, 28, -66, -121, 9, -32, -7, 112, 19, -58, -2, -119, -20, -9, 25, 36, -120, -10, -75, 80, 34, -29, 126, -105, -37, -28, 57, 66, 127, 118, 12, 53,
+ -9, -31, -33, 7, -82, 80, -60, -10, -17, -17, 94, 63, 46, 77, 71, 8, 85, -113, -33, -16, -68, 37, 64, -21, -91, 116, -125, -41, -43, 1, -89, 6, -53, -105, 47, -5, 59, 71, -115,
+ 108, 30, 125, 16, 52, 7, 87, -29, 111, 126, -42, 48, 114, 80, 54, 85, -45, 52, 37, -63, -59, 81, 55, 83, 67, -11, 68, -57, 91, -38, -40, 113, -25, 89, 86, -44, 53, -84, -48,
+ -120, -38, 21, -29, 103, -53, 32, -122, -32, -11, 20, 55, -32, -91, 99, -98, -45, -5, -94, 107, 120, 66, 90, -64, -7, 103, 122, -33, 44, -91, -80, -1, -98, 99, -71, 120, 10,
+ -114, 43, 58, -11, -69, -55, 65, -17, -113, -37, -51, 39, -117, 60, 3, -76, 87, 90, -27, 85, -82, -6, 89, -40, 77, -14, -124, 29, -9, 122, -97, 119, -126, 84, 116, 28, -45,
+ -50, 74, 107, 8, 8, 101, -124, 5, 56, 4, -125, 100, -4, -100, -11, -65, -8, -110, -27, 0, -106, -37, 29, 91, 35, 80, 88, 64, 117, -128, -91, -117, 5, -36, -27, -108, 29, 3,
+ 115, 95, -69, -53, -20, -122, 39, -21, -29, -128, -58, -94, -78, -100, -4, -58, -12, 104, -96, -98, -9, 0, 64, -7, 72, -127, -86, 76, 57, -36, -86, 39, -100, -126, -71, 13,
+ 116, -106, -71, -6, 66, -67, -85, -90, 92, 99, -47, -101, 16, -52, -90, -1, 84, -112, -36, -112, 114, -3, -126, 29, -121, 68, -37, -118, 7, -91, -50, -33, 23, -113, 68, -66,
+ -27, -30, -20, -78, 8, 43, -27, -62, -74, 22, 1, -53, 28, 114, -8, 54, -14, 120, 118, -70, -112, -23, 19, -2, 21, 126, -44, 20, -43, 75, 27, -92, 2, -84, 48, 108, 101, 39, 35,
+ -93, 16, 62, -58, -20, -24, 44, -109, 110, 95, -68, 73, -82, -125, -99, 26, -88, 16, -48, -125, 44, -68, -122, 57, 111, 8, 0, 43, 107, 122, 78, 57, -22, -77, 83, 115, 107, -87,
+ 112, 91, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+ long[] state = new long[]{-2224758530180934284L, 2670573948063642579L};
+ testGHASH(ghash, state, input, 5, input.length - 5);
+ }
+}
diff --git a/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/HotSpotGraphBuilderPlugins.java b/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/HotSpotGraphBuilderPlugins.java
index b99bf245ee70..1bc097d3db50 100644
--- a/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/HotSpotGraphBuilderPlugins.java
+++ b/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/HotSpotGraphBuilderPlugins.java
@@ -35,7 +35,6 @@
import static org.graalvm.compiler.hotspot.HotSpotBackend.CRC_TABLE_LOCATION;
import static org.graalvm.compiler.hotspot.HotSpotBackend.ELECTRONIC_CODEBOOK_DECRYPT_AESCRYPT;
import static org.graalvm.compiler.hotspot.HotSpotBackend.ELECTRONIC_CODEBOOK_ENCRYPT_AESCRYPT;
-import static org.graalvm.compiler.hotspot.HotSpotBackend.GHASH_PROCESS_BLOCKS;
import static org.graalvm.compiler.hotspot.HotSpotBackend.UPDATE_BYTES_CRC32;
import static org.graalvm.compiler.hotspot.HotSpotBackend.UPDATE_BYTES_CRC32C;
import static org.graalvm.compiler.hotspot.meta.HotSpotGraphBuilderPlugins.CipherBlockChainingCryptPlugin.readAESCryptKArrayStart;
@@ -227,7 +226,6 @@ public void run() {
registerBigIntegerPlugins(invocationPlugins, config, replacements);
registerSHAPlugins(invocationPlugins, config, replacements);
registerMD5Plugins(invocationPlugins, config, replacements);
- registerGHASHPlugins(invocationPlugins, config, metaAccess, replacements);
registerBase64Plugins(invocationPlugins, config, metaAccess, replacements);
registerUnsafePlugins(invocationPlugins, config, replacements);
StandardGraphBuilderPlugins.registerInvocationPlugins(snippetReflection, invocationPlugins, replacements, true, false, true, graalRuntime.getHostProviders().getLowerer());
@@ -942,24 +940,6 @@ private static void registerMD5Plugins(InvocationPlugins plugins, GraalHotSpotVM
r.registerConditional(config.md5ImplCompress != 0L, new DigestInvocationPlugin(HotSpotBackend.MD5_IMPL_COMPRESS));
}
- private static void registerGHASHPlugins(InvocationPlugins plugins, GraalHotSpotVMConfig config, MetaAccessProvider metaAccess, Replacements replacements) {
- Registration r = new Registration(plugins, "com.sun.crypto.provider.GHASH", replacements);
- r.registerConditional(config.useGHASHIntrinsics(), new InvocationPlugin("processBlocks", byte[].class, int.class, int.class, long[].class, long[].class) {
- @Override
- public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Receiver receiver,
- ValueNode data, ValueNode inOffset, ValueNode blocks, ValueNode state, ValueNode hashSubkey) {
- int longArrayBaseOffset = metaAccess.getArrayBaseOffset(JavaKind.Long);
- int byteArrayBaseOffset = metaAccess.getArrayBaseOffset(JavaKind.Byte);
- ValueNode dataOffset = AddNode.create(ConstantNode.forInt(byteArrayBaseOffset), inOffset, NodeView.DEFAULT);
- ComputeObjectAddressNode dataAddress = b.add(new ComputeObjectAddressNode(data, dataOffset));
- ComputeObjectAddressNode stateAddress = b.add(new ComputeObjectAddressNode(state, ConstantNode.forInt(longArrayBaseOffset)));
- ComputeObjectAddressNode hashSubkeyAddress = b.add(new ComputeObjectAddressNode(hashSubkey, ConstantNode.forInt(longArrayBaseOffset)));
- b.add(new ForeignCallNode(GHASH_PROCESS_BLOCKS, stateAddress, hashSubkeyAddress, dataAddress, blocks));
- return true;
- }
- });
- }
-
private static void registerBase64Plugins(InvocationPlugins plugins, GraalHotSpotVMConfig config, MetaAccessProvider metaAccess, Replacements replacements) {
Registration r = new Registration(plugins, "java.util.Base64$Encoder", replacements);
r.registerConditional(config.base64EncodeBlock != 0L, new InvocationPlugin("encodeBlock", Receiver.class, byte[].class, int.class, int.class, byte[].class, int.class, boolean.class) {
diff --git a/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/HotSpotHostForeignCallsProvider.java b/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/HotSpotHostForeignCallsProvider.java
index b068e6c537a8..ba3069a5725f 100644
--- a/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/HotSpotHostForeignCallsProvider.java
+++ b/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/HotSpotHostForeignCallsProvider.java
@@ -38,7 +38,6 @@
import static org.graalvm.compiler.hotspot.HotSpotBackend.ELECTRONIC_CODEBOOK_DECRYPT_AESCRYPT;
import static org.graalvm.compiler.hotspot.HotSpotBackend.ELECTRONIC_CODEBOOK_ENCRYPT_AESCRYPT;
import static org.graalvm.compiler.hotspot.HotSpotBackend.EXCEPTION_HANDLER;
-import static org.graalvm.compiler.hotspot.HotSpotBackend.GHASH_PROCESS_BLOCKS;
import static org.graalvm.compiler.hotspot.HotSpotBackend.IC_MISS_HANDLER;
import static org.graalvm.compiler.hotspot.HotSpotBackend.MD5_IMPL_COMPRESS;
import static org.graalvm.compiler.hotspot.HotSpotBackend.MD5_IMPL_COMPRESS_MB;
@@ -66,7 +65,6 @@
import static org.graalvm.compiler.hotspot.HotSpotBackend.UPDATE_BYTES_CRC32;
import static org.graalvm.compiler.hotspot.HotSpotBackend.UPDATE_BYTES_CRC32C;
import static org.graalvm.compiler.hotspot.HotSpotBackend.VM_ERROR;
-import static org.graalvm.compiler.hotspot.HotSpotForeignCallLinkage.RegisterEffect.COMPUTES_REGISTERS_KILLED;
import static org.graalvm.compiler.hotspot.HotSpotForeignCallLinkage.RegisterEffect.DESTROYS_ALL_CALLER_SAVE_REGISTERS;
import static org.graalvm.compiler.hotspot.HotSpotHostBackend.DEOPT_BLOB_UNCOMMON_TRAP;
import static org.graalvm.compiler.hotspot.HotSpotHostBackend.DEOPT_BLOB_UNPACK;
@@ -119,7 +117,6 @@
import org.graalvm.compiler.hotspot.stubs.ExceptionHandlerStub;
import org.graalvm.compiler.hotspot.stubs.IllegalArgumentExceptionArgumentIsNotAnArrayStub;
import org.graalvm.compiler.hotspot.stubs.IntegerExactOverflowExceptionStub;
-import org.graalvm.compiler.hotspot.stubs.IntrinsicStubsGen;
import org.graalvm.compiler.hotspot.stubs.LongExactOverflowExceptionStub;
import org.graalvm.compiler.hotspot.stubs.NegativeArraySizeExceptionStub;
import org.graalvm.compiler.hotspot.stubs.NullPointerExceptionStub;
@@ -132,7 +129,6 @@
import org.graalvm.compiler.options.OptionValues;
import org.graalvm.compiler.replacements.SnippetTemplate;
import org.graalvm.compiler.replacements.arraycopy.ArrayCopyForeignCalls;
-import org.graalvm.compiler.replacements.nodes.CryptoForeignCalls;
import org.graalvm.compiler.word.Word;
import org.graalvm.compiler.word.WordTypes;
import org.graalvm.word.LocationIdentity;
@@ -530,9 +526,6 @@ public void initialize(HotSpotProviders providers, OptionValues options) {
if (c.sha3ImplCompressMultiBlock != 0L) {
registerForeignCall(SHA3_IMPL_COMPRESS_MB, c.sha3ImplCompressMultiBlock, NativeCall);
}
- if (c.useGHASHIntrinsics()) {
- registerForeignCall(GHASH_PROCESS_BLOCKS, c.ghashProcessBlocks, NativeCall);
- }
if (c.base64EncodeBlock != 0L) {
registerForeignCall(BASE64_ENCODE_BLOCK, c.base64EncodeBlock, NativeCall);
}
@@ -574,10 +567,6 @@ public void initialize(HotSpotProviders providers, OptionValues options) {
registerForeignCall(ELECTRONIC_CODEBOOK_DECRYPT_AESCRYPT, c.electronicCodeBookDecrypt, NativeCall);
}
- for (ForeignCallDescriptor stub : CryptoForeignCalls.STUBS) {
- link(new IntrinsicStubsGen(options, providers, registerStubCall(stub.getSignature(), LEAF, NOT_REEXECUTABLE, COMPUTES_REGISTERS_KILLED, stub.getKilledLocations())));
- }
-
if (c.useAESIntrinsics) {
try {
// These stubs do callee saving
diff --git a/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/UnimplementedGraalIntrinsics.java b/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/UnimplementedGraalIntrinsics.java
index 9b04482f7238..aa0dbd8db723 100644
--- a/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/UnimplementedGraalIntrinsics.java
+++ b/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/UnimplementedGraalIntrinsics.java
@@ -33,6 +33,8 @@
import org.graalvm.compiler.debug.GraalError;
import org.graalvm.compiler.hotspot.GraalHotSpotVMConfig;
import org.graalvm.compiler.nodes.graphbuilderconf.InvocationPlugin;
+import org.graalvm.compiler.replacements.aarch64.AArch64GraphBuilderPlugins;
+import org.graalvm.compiler.replacements.amd64.AMD64GraphBuilderPlugins;
import org.graalvm.compiler.serviceprovider.JavaVersionUtil;
import jdk.vm.ci.aarch64.AArch64;
@@ -213,7 +215,7 @@ public UnimplementedGraalIntrinsics(GraalHotSpotVMConfig config, Architecture ar
add(ignore,
"com/sun/crypto/provider/CounterMode.implCrypt([BII[BI)I");
}
- if (!config.useGHASHIntrinsics()) {
+ if (!supportsGHASHPlugins(arch)) {
add(ignore,
"com/sun/crypto/provider/GHASH.processBlocks([BII[J[J)V");
}
@@ -239,10 +241,14 @@ public UnimplementedGraalIntrinsics(GraalHotSpotVMConfig config, Architecture ar
}
// AES intrinsics
- if (!config.useAESIntrinsics) {
+ if (!supportsAESPlugins(arch)) {
add(ignore,
"com/sun/crypto/provider/AESCrypt.implDecryptBlock([BI[BI)V",
- "com/sun/crypto/provider/AESCrypt.implEncryptBlock([BI[BI)V",
+ "com/sun/crypto/provider/AESCrypt.implEncryptBlock([BI[BI)V");
+ }
+
+ if (!config.useAESIntrinsics) {
+ add(ignore,
"com/sun/crypto/provider/CipherBlockChaining.implDecrypt([BII[BI)I",
"com/sun/crypto/provider/CipherBlockChaining.implEncrypt([BII[BI)I");
}
@@ -493,4 +499,22 @@ public boolean isMissing(String method) {
public boolean isDocumented(String method) {
return isIgnored(method) || isImplementedInEnterprise(method) || isMissing(method) || isIgnored(method);
}
+
+ public static boolean supportsAESPlugins(Architecture arch) {
+ if (arch instanceof AMD64) {
+ return AMD64GraphBuilderPlugins.supportsAESPlugins((AMD64) arch);
+ } else if (arch instanceof AArch64) {
+ return AArch64GraphBuilderPlugins.supportsAESPlugins((AArch64) arch);
+ }
+ return false;
+ }
+
+ public static boolean supportsGHASHPlugins(Architecture arch) {
+ if (arch instanceof AMD64) {
+ return AMD64GraphBuilderPlugins.supportsGHASHPlugins((AMD64) arch);
+ } else if (arch instanceof AArch64) {
+ return AArch64GraphBuilderPlugins.supportsGHASHPlugins((AArch64) arch);
+ }
+ return false;
+ }
}
diff --git a/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/stubs/IntrinsicStubs.java b/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/stubs/IntrinsicStubs.java
index d80d3d50909a..0c9b687e6a04 100644
--- a/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/stubs/IntrinsicStubs.java
+++ b/compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/stubs/IntrinsicStubs.java
@@ -32,6 +32,7 @@
import org.graalvm.compiler.replacements.nodes.ArrayIndexOfNode;
import org.graalvm.compiler.replacements.nodes.ArrayRegionCompareToNode;
import org.graalvm.compiler.replacements.nodes.ArrayRegionEqualsNode;
+import org.graalvm.compiler.replacements.nodes.GHASHProcessBlocksNode;
import org.graalvm.compiler.replacements.nodes.VectorizedMismatchNode;
@GeneratedStubsHolder(targetVM = "hotspot", sources = {
@@ -43,6 +44,7 @@
ArrayCopyWithConversionsNode.class,
VectorizedMismatchNode.class,
AESNode.class,
+ GHASHProcessBlocksNode.class,
})
public final class IntrinsicStubs {
}
diff --git a/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64AESDecryptOp.java b/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64AESDecryptOp.java
index b7367203a06d..5faf24c61d51 100644
--- a/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64AESDecryptOp.java
+++ b/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64AESDecryptOp.java
@@ -83,7 +83,14 @@ public AArch64AESDecryptOp(Value fromValue, Value toValue, Value keyValue, int l
this.toValue = toValue;
this.keyValue = keyValue;
this.lengthOffset = lengthOffset;
- this.temps = new Value[]{v0.asValue(), v1.asValue(), v2.asValue(), v3.asValue(), v4.asValue(), v5.asValue()};
+ this.temps = new Value[]{
+ v0.asValue(),
+ v1.asValue(),
+ v2.asValue(),
+ v3.asValue(),
+ v4.asValue(),
+ v5.asValue(),
+ };
}
@Override
diff --git a/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64AESEncryptOp.java b/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64AESEncryptOp.java
index 86e451ddff60..f034231a9476 100644
--- a/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64AESEncryptOp.java
+++ b/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64AESEncryptOp.java
@@ -68,13 +68,13 @@
@StubPort(path = "src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp",
lineStart = 2562,
lineEnd = 2592,
- commit = "61e072d11c8e0cb5879bb733ed1fdd2144326bfd",
+ commit = "f91943c19fc0b060684a437d2c768461d54c088e",
sha1 = "350e5592f4df298c7ee648581bb1e8342edf9a05")
@StubPort(path = "src/hotspot/cpu/aarch64/macroAssembler_aarch64_aes.cpp",
lineStart = 112,
lineEnd = 283,
- commit = "61e072d11c8e0cb5879bb733ed1fdd2144326bfd",
- sha1 = "bb8410fff34e13647ce0411bc64de8fd279cfbff")
+ commit = "2fe0ce01485d7b84dc109d3d4f24bdd908c0e7cf",
+ sha1 = "0809579798e28fe7d2439e9ac5d5f8e23f1fcd21")
// @formatter:on
public final class AArch64AESEncryptOp extends AArch64LIRInstruction {
@@ -94,9 +94,24 @@ public AArch64AESEncryptOp(Value fromValue, Value toValue, Value keyValue, int l
this.toValue = toValue;
this.keyValue = keyValue;
this.lengthOffset = lengthOffset;
- this.temps = new Value[]{v0.asValue(), v17.asValue(), v18.asValue(), v19.asValue(), v20.asValue(), v21.asValue(),
- v22.asValue(), v23.asValue(), v24.asValue(), v25.asValue(), v26.asValue(), v27.asValue(),
- v28.asValue(), v29.asValue(), v30.asValue(), v31.asValue()};
+ this.temps = new Value[]{
+ v0.asValue(),
+ v17.asValue(),
+ v18.asValue(),
+ v19.asValue(),
+ v20.asValue(),
+ v21.asValue(),
+ v22.asValue(),
+ v23.asValue(),
+ v24.asValue(),
+ v25.asValue(),
+ v26.asValue(),
+ v27.asValue(),
+ v28.asValue(),
+ v29.asValue(),
+ v30.asValue(),
+ v31.asValue(),
+ };
}
@Override
@@ -110,18 +125,15 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
masm.ldr(32, keylen, AArch64Address.createImmediateAddress(32, IMMEDIATE_SIGNED_UNSCALED, key, lengthOffset));
aesencLoadkeys(masm, key, keylen);
- // Uses expanded key in v17..v31
- // Returns encrypted values in inputs.
- // If to != noreg, store value at to; likewise from
- // Preserves key, keylen
- // Increments from, to
- // Input data in v0, v1, ...
- // unrolls controls the number of times to unroll the generated function
- new AESKernelGenerator(masm, 1, from, to, keylen, 0, 17).unroll();
+ aesecbEncrypt(masm, from, to, keylen, v0, 1);
}
}
- private static void aesencLoadkeys(AArch64MacroAssembler masm, Register key, Register keylen) {
+ static Register asFloatRegister(Register base, int offset) {
+ return AArch64.simdRegisters.get(base.encoding + offset);
+ }
+
+ static void aesencLoadkeys(AArch64MacroAssembler masm, Register key, Register keylen) {
Label loadkeys44 = new Label();
Label loadkeys52 = new Label();
@@ -170,10 +182,10 @@ private static void aesencLoadkeys(AArch64MacroAssembler masm, Register key, Reg
* {@link #generate(int)}, {@link #length()}, and {@link #next()} to generate unrolled and
* interleaved functions.
*/
- public abstract static class KernelGenerator {
+ abstract static class KernelGenerator {
protected final int unrolls;
- public KernelGenerator(int unrolls) {
+ KernelGenerator(int unrolls) {
this.unrolls = unrolls;
}
@@ -199,26 +211,26 @@ public void unroll() {
}
/** An unrolled and interleaved generator for AES encryption. */
- public static class AESKernelGenerator extends KernelGenerator {
+ static final class AESKernelGenerator extends KernelGenerator {
private final AArch64MacroAssembler masm;
private final Register from;
private final Register to;
private final Register keylen;
- private final int data;
- private final int subkeys;
+ private final Register data;
+ private final Register subkeys;
private final boolean once;
private final Label rounds44;
private final Label rounds52;
- public AESKernelGenerator(AArch64MacroAssembler masm,
+ AESKernelGenerator(AArch64MacroAssembler masm,
int unrolls,
Register from,
Register to,
Register keylen,
- int data,
- int subkeys,
+ Register data,
+ Register subkeys,
boolean once) {
super(unrolls);
this.masm = masm;
@@ -232,13 +244,13 @@ public AESKernelGenerator(AArch64MacroAssembler masm,
this.rounds52 = new Label();
}
- public AESKernelGenerator(AArch64MacroAssembler masm,
+ AESKernelGenerator(AArch64MacroAssembler masm,
int unrolls,
Register from,
Register to,
Register keylen,
- int data,
- int subkeys) {
+ Register data,
+ Register subkeys) {
this(masm,
unrolls,
from,
@@ -249,13 +261,9 @@ public AESKernelGenerator(AArch64MacroAssembler masm,
true);
}
- private static Register getSimdRegister(int index) {
- return AArch64.simdRegisters.get(index);
- }
-
- private void aesRound(int input, int subkey) {
- masm.neon.aese(getSimdRegister(input), getSimdRegister(subkey));
- masm.neon.aesmc(getSimdRegister(input), getSimdRegister(input));
+ private void aesRound(Register input, Register subkey) {
+ masm.neon.aese(input, subkey);
+ masm.neon.aesmc(input, input);
}
@Override
@@ -264,7 +272,7 @@ public void generate(int index) {
case 0:
if (!from.equals(Register.None)) {
// get 16 bytes of input
- masm.fldr(128, getSimdRegister(data), AArch64Address.createBaseRegisterOnlyAddress(128, from));
+ masm.fldr(128, data, AArch64Address.createBaseRegisterOnlyAddress(128, from));
}
break;
case 1:
@@ -275,10 +283,10 @@ public void generate(int index) {
}
break;
case 2:
- aesRound(data, subkeys + 0);
+ aesRound(data, asFloatRegister(subkeys, 0));
break;
case 3:
- aesRound(data, subkeys + 1);
+ aesRound(data, asFloatRegister(subkeys, 1));
break;
case 4:
if (once) {
@@ -286,10 +294,10 @@ public void generate(int index) {
}
break;
case 5:
- aesRound(data, subkeys + 2);
+ aesRound(data, asFloatRegister(subkeys, 2));
break;
case 6:
- aesRound(data, subkeys + 3);
+ aesRound(data, asFloatRegister(subkeys, 3));
break;
case 7:
if (once) {
@@ -297,41 +305,41 @@ public void generate(int index) {
}
break;
case 8:
- aesRound(data, subkeys + 4);
+ aesRound(data, asFloatRegister(subkeys, 4));
break;
case 9:
- aesRound(data, subkeys + 5);
+ aesRound(data, asFloatRegister(subkeys, 5));
break;
case 10:
- aesRound(data, subkeys + 6);
+ aesRound(data, asFloatRegister(subkeys, 6));
break;
case 11:
- aesRound(data, subkeys + 7);
+ aesRound(data, asFloatRegister(subkeys, 7));
break;
case 12:
- aesRound(data, subkeys + 8);
+ aesRound(data, asFloatRegister(subkeys, 8));
break;
case 13:
- aesRound(data, subkeys + 9);
+ aesRound(data, asFloatRegister(subkeys, 9));
break;
case 14:
- aesRound(data, subkeys + 10);
+ aesRound(data, asFloatRegister(subkeys, 10));
break;
case 15:
- aesRound(data, subkeys + 11);
+ aesRound(data, asFloatRegister(subkeys, 11));
break;
case 16:
- aesRound(data, subkeys + 12);
+ aesRound(data, asFloatRegister(subkeys, 12));
break;
case 17:
- masm.neon.aese(getSimdRegister(data), getSimdRegister(subkeys + 13));
+ masm.neon.aese(data, asFloatRegister(subkeys, 13));
break;
case 18:
- masm.neon.eorVVV(ASIMDSize.FullReg, getSimdRegister(data), getSimdRegister(data), getSimdRegister(subkeys + 14));
+ masm.neon.eorVVV(ASIMDSize.FullReg, data, data, asFloatRegister(subkeys, 14));
break;
case 19:
if (!to.equals(Register.None)) {
- masm.fstr(128, getSimdRegister(data), AArch64Address.createBaseRegisterOnlyAddress(128, to));
+ masm.fstr(128, data, AArch64Address.createBaseRegisterOnlyAddress(128, to));
}
break;
default:
@@ -346,7 +354,7 @@ public KernelGenerator next() {
from,
to,
keylen,
- data + 1,
+ asFloatRegister(data, 1),
subkeys,
false);
}
@@ -356,4 +364,15 @@ public int length() {
return 20;
}
}
+
+ // Uses expanded key in v17..v31
+ // Returns encrypted values in inputs.
+ // If to != noreg, store value at to; likewise from
+ // Preserves key, keylen
+ // Increments from, to
+ // Input data in v0, v1, ...
+ // unrolls controls the number of times to unroll the generated function
+ static void aesecbEncrypt(AArch64MacroAssembler masm, Register from, Register to, Register keylen, Register data, int unrolls) {
+ new AESKernelGenerator(masm, unrolls, from, to, keylen, data, v17).unroll();
+ }
}
diff --git a/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64GHASHProcessBlocksOp.java b/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64GHASHProcessBlocksOp.java
new file mode 100644
index 000000000000..c76c8774c669
--- /dev/null
+++ b/compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64GHASHProcessBlocksOp.java
@@ -0,0 +1,779 @@
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.graalvm.compiler.lir.aarch64;
+
+import static jdk.vm.ci.aarch64.AArch64.v0;
+import static jdk.vm.ci.aarch64.AArch64.v1;
+import static jdk.vm.ci.aarch64.AArch64.v2;
+import static jdk.vm.ci.aarch64.AArch64.v24;
+import static jdk.vm.ci.aarch64.AArch64.v28;
+import static jdk.vm.ci.aarch64.AArch64.v29;
+import static jdk.vm.ci.aarch64.AArch64.v3;
+import static jdk.vm.ci.aarch64.AArch64.v30;
+import static jdk.vm.ci.aarch64.AArch64.v31;
+import static jdk.vm.ci.aarch64.AArch64.v4;
+import static jdk.vm.ci.aarch64.AArch64.v5;
+import static jdk.vm.ci.aarch64.AArch64.v6;
+import static jdk.vm.ci.aarch64.AArch64.v7;
+import static jdk.vm.ci.code.ValueUtil.asRegister;
+import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_PAIR_SIGNED_SCALED;
+import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED;
+import static org.graalvm.compiler.asm.aarch64.AArch64Address.AddressingMode.IMMEDIATE_SIGNED_UNSCALED;
+import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
+import static org.graalvm.compiler.lir.aarch64.AArch64AESEncryptOp.asFloatRegister;
+
+import java.util.Arrays;
+
+import org.graalvm.compiler.asm.Label;
+import org.graalvm.compiler.asm.aarch64.AArch64ASIMDAssembler.ASIMDSize;
+import org.graalvm.compiler.asm.aarch64.AArch64ASIMDAssembler.ElementSize;
+import org.graalvm.compiler.asm.aarch64.AArch64Address;
+import org.graalvm.compiler.asm.aarch64.AArch64Assembler.ConditionFlag;
+import org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler;
+import org.graalvm.compiler.debug.GraalError;
+import org.graalvm.compiler.lir.LIRInstructionClass;
+import org.graalvm.compiler.lir.StubPort;
+import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
+import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
+
+import jdk.vm.ci.aarch64.AArch64;
+import jdk.vm.ci.aarch64.AArch64Kind;
+import jdk.vm.ci.code.Register;
+import jdk.vm.ci.meta.AllocatableValue;
+import jdk.vm.ci.meta.Value;
+
+// @formatter:off
+@StubPort(path = "src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp",
+ lineStart = 5831,
+ lineEnd = 5965,
+ commit = "f91943c19fc0b060684a437d2c768461d54c088e",
+ sha1 = "f11f84b57df21c9b49473f204e11efc0e6da53d0")
+@StubPort(path = "src/hotspot/cpu/aarch64/macroAssembler_aarch64_aes.cpp",
+ lineStart = 285,
+ lineEnd = 691,
+ commit = "2fe0ce01485d7b84dc109d3d4f24bdd908c0e7cf",
+ sha1 = "75163bb4c510e3fa9f2347c5017561493d893691")
+// @formatter:on
+public final class AArch64GHASHProcessBlocksOp extends AArch64LIRInstruction {
+
+ public static final LIRInstructionClass TYPE = LIRInstructionClass.create(AArch64GHASHProcessBlocksOp.class);
+
+ private static final int REGISTER_STRIDE = 7;
+
+ @Alive({REG}) private Value stateValue;
+ @Alive({REG}) private Value htblValue;
+ @Alive({REG}) private Value originalDataValue;
+ @Alive({REG}) private Value originalBlocksValue;
+
+ @Temp({REG}) private Value dataValue;
+ @Temp({REG}) private Value blocksValue;
+
+ @Temp protected Value[] temps;
+
+ public AArch64GHASHProcessBlocksOp(LIRGeneratorTool tool, AllocatableValue stateValue, AllocatableValue htblValue, AllocatableValue originalDataValue, AllocatableValue originalBlocksValue) {
+ super(TYPE);
+
+ this.stateValue = stateValue;
+ this.htblValue = htblValue;
+ this.originalDataValue = originalDataValue;
+ this.originalBlocksValue = originalBlocksValue;
+
+ this.dataValue = tool.newVariable(originalDataValue.getValueKind());
+ this.blocksValue = tool.newVariable(originalBlocksValue.getValueKind());
+
+ this.temps = Arrays.stream(AArch64.simdRegisters.toArray()).map(Register::asValue).toArray(Value[]::new);
+ }
+
+ @Override
+ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
+ assert stateValue.getPlatformKind().equals(AArch64Kind.QWORD) : stateValue;
+ assert htblValue.getPlatformKind().equals(AArch64Kind.QWORD) : htblValue;
+ assert originalDataValue.getPlatformKind().equals(AArch64Kind.QWORD) : originalDataValue;
+ assert originalBlocksValue.getPlatformKind().equals(AArch64Kind.DWORD) : originalBlocksValue;
+
+ Label labelSmall = new Label();
+ Label labelDone = new Label();
+
+ Register state = asRegister(stateValue);
+ Register subkeyH = asRegister(htblValue);
+ Register originalData = asRegister(originalDataValue);
+ Register originalBlocks = asRegister(originalBlocksValue);
+
+ Register data = asRegister(dataValue);
+ Register blocks = asRegister(blocksValue);
+
+ masm.mov(64, data, originalData);
+ masm.mov(32, blocks, originalBlocks);
+
+ masm.compare(32, blocks, 8);
+ masm.branchConditionally(ConditionFlag.LT, labelSmall);
+
+ // No need to save/restore states as we already mark all SIMD registers as killed.
+ // masm.sub(64, sp, sp, 4 * 16);
+ // masm.neon.st1MultipleVVVV(ASIMDSize.FullReg, ElementSize.Byte, v12, v13, v14, v15,
+ // AArch64Address.createBaseRegisterOnlyAddress(AArch64Address.ANY_SIZE, sp));
+ // masm.sub(64, sp, sp, 4 * 16);
+ // masm.neon.st1MultipleVVVV(ASIMDSize.FullReg, ElementSize.Byte, v8, v9, v10, v11,
+ // AArch64Address.createBaseRegisterOnlyAddress(AArch64Address.ANY_SIZE, sp));
+
+ ghashProcessBlocksWide(masm, state, subkeyH, data, blocks, 4);
+
+ // masm.neon.ld1MultipleVVVV(ASIMDSize.FullReg, ElementSize.Byte, v8, v9, v10, v11,
+ // AArch64Address.createStructureImmediatePostIndexAddress(ASIMDInstruction.LD1_MULTIPLE_4R,
+ // ASIMDSize.FullReg, ElementSize.Byte, sp, 64));
+ // masm.neon.ld1MultipleVVVV(ASIMDSize.FullReg, ElementSize.Byte, v12, v13, v14, v15,
+ // AArch64Address.createStructureImmediatePostIndexAddress(ASIMDInstruction.LD1_MULTIPLE_4R,
+ // ASIMDSize.FullReg, ElementSize.Byte, sp, 64));
+
+ masm.compare(32, blocks, 0);
+ masm.branchConditionally(ConditionFlag.LE, labelDone);
+
+ masm.bind(labelSmall);
+ generateGhashProcessBlocks(masm, state, subkeyH, data, blocks);
+ masm.bind(labelDone);
+ }
+
+ private static void generateGhashProcessBlocks(AArch64MacroAssembler masm,
+ Register state,
+ Register subkeyH,
+ Register data,
+ Register blocks) {
+ // Bafflingly, GCM uses little-endian for the byte order, but
+ // big-endian for the bit order. For example, the polynomial 1 is
+ // represented as the 16-byte string 80 00 00 00 | 12 bytes of 00.
+ //
+ // So, we must either reverse the bytes in each word and do
+ // everything big-endian or reverse the bits in each byte and do
+ // it little-endian. On AArch64 it's more idiomatic to reverse
+ // the bits in each byte (we have an instruction, RBIT, to do
+ // that) and keep the data in little-endian bit order through the
+ // calculation, bit-reversing the inputs and outputs.
+ Register vzr = v30;
+ masm.neon.eorVVV(ASIMDSize.FullReg, vzr, vzr, vzr); // zero register
+ // The field polynomial
+ try (AArch64MacroAssembler.ScratchRegister sc = masm.getScratchRegister()) {
+ Register scratch = sc.getRegister();
+ masm.mov(scratch, 0x00000087L);
+ masm.neon.dupVG(ASIMDSize.FullReg, ElementSize.DoubleWord, v24, scratch);
+ }
+
+ masm.fldr(128, v0, AArch64Address.createBaseRegisterOnlyAddress(128, state));
+ masm.fldr(128, v1, AArch64Address.createBaseRegisterOnlyAddress(128, subkeyH));
+
+ // Bit-reverse words in state and subkeyH
+ masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v0, v0);
+ masm.neon.rbitVV(ASIMDSize.FullReg, v0, v0);
+ masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v1, v1);
+ masm.neon.rbitVV(ASIMDSize.FullReg, v1, v1);
+
+ // long-swap subkeyH into v1
+ masm.neon.extVVV(ASIMDSize.FullReg, v4, v1, v1, 0x08);
+ // xor subkeyH into subkeyL (Karatsuba: (A1+A0))
+ masm.neon.eorVVV(ASIMDSize.FullReg, v4, v4, v1);
+
+ Label labelGHASHLoop = new Label();
+ masm.bind(labelGHASHLoop);
+
+ // Load the data, bit reversing each byte
+ masm.fldr(128, v2, AArch64Address.createImmediateAddress(128, IMMEDIATE_POST_INDEXED, data, 0x10));
+ masm.neon.rbitVV(ASIMDSize.FullReg, v2, v2);
+ // bit-swapped data ^ bit-swapped state
+ masm.neon.eorVVV(ASIMDSize.FullReg, v2, v0, v2);
+
+ // Multiply state in v2 by subkey in v1
+ ghashMultiply(masm,
+ /* resultLo */v5,
+ /* resultHi */v7,
+ /* a */v1,
+ /* b */v2,
+ /* a1XORa0 */v4,
+ /* temps */v6,
+ v3,
+ /* reuse/clobber b */v2);
+ // Reduce v7:v5 by the field polynomial
+ ghashReduce(masm,
+ /* result */v0,
+ /* lo */v5,
+ /* hi */v7,
+ /* p */v24,
+ vzr,
+ /* temp */v3);
+
+ masm.sub(32, blocks, blocks, 1);
+ masm.cbnz(32, blocks, labelGHASHLoop);
+
+ // The bit-reversed result is at this point in v0
+ masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v0, v0);
+ masm.neon.rbitVV(ASIMDSize.FullReg, v0, v0);
+ masm.fstr(128, v0, AArch64Address.createBaseRegisterOnlyAddress(128, state));
+ }
+
+ /**
+ * Interleaved GHASH processing. Clobbers all vector registers.
+ */
+ private static void ghashProcessBlocksWide(AArch64MacroAssembler masm,
+ Register state,
+ Register subkeyH,
+ Register data,
+ Register blocks,
+ int unrolls) {
+ Register a1XORa0 = v28;
+ Register hPrime = v29;
+ Register vzr = v30;
+ Register p = v31;
+ masm.neon.eorVVV(ASIMDSize.FullReg, vzr, vzr, vzr); // zero register
+
+ // The field polynomial
+ try (AArch64MacroAssembler.ScratchRegister sc = masm.getScratchRegister()) {
+ Register scratch = sc.getRegister();
+ masm.mov(scratch, 0x00000087L);
+ masm.neon.dupVG(ASIMDSize.FullReg, ElementSize.DoubleWord, p, scratch);
+ }
+
+ masm.fldr(128, v0, AArch64Address.createBaseRegisterOnlyAddress(128, state));
+ masm.fldr(128, hPrime, AArch64Address.createBaseRegisterOnlyAddress(128, subkeyH));
+
+ // Bit-reverse words in state and subkeyH
+ masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v0, v0);
+ masm.neon.rbitVV(ASIMDSize.FullReg, v0, v0);
+
+ masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, hPrime, hPrime);
+ masm.neon.rbitVV(ASIMDSize.FullReg, hPrime, hPrime);
+
+ // Powers of H -> hPrime
+
+ Label labelAlreadyCalculated = new Label();
+ Label labelDone = new Label();
+ // The first time around we'll have to calculate H**2, H**3, etc.
+ // Look at the largest power of H in the subkeyH array to see if
+ // it's already been calculated.
+ try (AArch64MacroAssembler.ScratchRegister sc1 = masm.getScratchRegister();
+ AArch64MacroAssembler.ScratchRegister sc2 = masm.getScratchRegister()) {
+ Register rscratch1 = sc1.getRegister();
+ Register rscratch2 = sc2.getRegister();
+ masm.ldp(64, rscratch1, rscratch2, AArch64Address.createImmediateAddress(64, IMMEDIATE_PAIR_SIGNED_SCALED, subkeyH, 16 * (unrolls - 1)));
+ masm.orr(64, rscratch1, rscratch1, rscratch2);
+ masm.cbnz(64, rscratch1, labelAlreadyCalculated);
+ }
+
+ // Start with H in v6 and hPrime
+ masm.neon.orrVVV(ASIMDSize.FullReg, v6, hPrime, hPrime);
+ for (int i = 1; i < unrolls; i++) {
+ // long-swap subkeyH into a1XORa0
+ masm.neon.extVVV(ASIMDSize.FullReg, a1XORa0, hPrime, hPrime, 0x08);
+ // xor subkeyH into subkeyL (Karatsuba:(A1+A0))
+ masm.neon.eorVVV(ASIMDSize.FullReg, a1XORa0, a1XORa0, hPrime);
+ ghashModmul(masm,
+ /* result */v6,
+ /* result_lo */v5,
+ /* result_hi */v4,
+ /* b */v6,
+ hPrime,
+ vzr,
+ a1XORa0,
+ p,
+ /* temps */v1,
+ v3,
+ v2);
+ masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v1, v6);
+ masm.neon.rbitVV(ASIMDSize.FullReg, v1, v1);
+ masm.fstr(128, v1, AArch64Address.createImmediateAddress(128, IMMEDIATE_SIGNED_UNSCALED, subkeyH, 16 * i));
+ }
+ masm.jmp(labelDone);
+ masm.bind(labelAlreadyCalculated);
+
+ // Load the largest power of H we need into v6.
+ masm.fldr(128, v6, AArch64Address.createImmediateAddress(128, IMMEDIATE_SIGNED_UNSCALED, subkeyH, 16 * (unrolls - 1)));
+ masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v6, v6);
+ masm.neon.rbitVV(ASIMDSize.FullReg, v6, v6);
+
+ masm.bind(labelDone);
+ // Move H ** unrolls into hPrime
+ masm.neon.orrVVV(ASIMDSize.FullReg, hPrime, v6, v6);
+
+ // hPrime contains (H ** 1, H ** 2, ... H ** unrolls)
+ // v0 contains the initial state. Clear the others.
+ for (int i = 1; i < unrolls; i++) {
+ int ofs = i * REGISTER_STRIDE;
+ // zero each state register
+ masm.neon.eorVVV(ASIMDSize.FullReg, asFloatRegister(v0, ofs), asFloatRegister(v0, ofs), asFloatRegister(v0, ofs));
+ }
+
+ // long-swap subkeyH into a1XORa0
+ masm.neon.extVVV(ASIMDSize.FullReg, a1XORa0, hPrime, hPrime, 0x08);
+ // xor subkeyH into subkeyL (Karatsuba: (A1+A0))
+ masm.neon.eorVVV(ASIMDSize.FullReg, a1XORa0, a1XORa0, hPrime);
+
+ // Load #unrolls blocks of data
+ for (int ofs = 0; ofs < unrolls * REGISTER_STRIDE; ofs += REGISTER_STRIDE) {
+ masm.fldr(128, asFloatRegister(v2, ofs), AArch64Address.createImmediateAddress(128, IMMEDIATE_POST_INDEXED, data, 0x10));
+ }
+
+ // Register assignments, replicated across 4 clones, v0 ... v23
+ //
+ // v0: input / output: current state, result of multiply/reduce
+ // v1: temp
+ // v2: input: one block of data (the ciphertext)
+ // also used as a temp once the data has been consumed
+ // v3: temp
+ // v4: output: high part of product
+ // v5: output: low part ...
+ // v6: unused
+ //
+ // Not replicated:
+ //
+ // v28: High part of H xor low part of H'
+ // v29: H' (hash subkey)
+ // v30: zero
+ // v31: Reduction polynomial of the Galois field
+
+ // Inner loop.
+ // Do the whole load/add/multiply/reduce over all our data except
+ // the last few rows.
+ Label labelGHASHLoop = new Label();
+ masm.bind(labelGHASHLoop);
+
+ // Prefetching doesn't help here. In fact, on Neoverse N1 it's worse.
+ // prfm(Address(data, 128), PLDL1KEEP);
+
+ // Xor data into current state
+ for (int ofs = 0; ofs < unrolls * REGISTER_STRIDE; ofs += REGISTER_STRIDE) {
+ // bit-swapped data ^ bit-swapped state
+ masm.neon.rbitVV(ASIMDSize.FullReg, asFloatRegister(v2, ofs), asFloatRegister(v2, ofs));
+ masm.neon.eorVVV(ASIMDSize.FullReg, asFloatRegister(v2, ofs), asFloatRegister(v0, ofs), asFloatRegister(v2, ofs));
+ }
+
+ // Generate fully-unrolled multiply-reduce in two stages.
+ new GHASHMultiplyGenerator(masm,
+ unrolls,
+ /* result_lo */v5,
+ /* result_hi */v4,
+ /* data */v2,
+ hPrime,
+ a1XORa0,
+ p,
+ vzr,
+ /* temps */v1,
+ v3,
+ /* reuse b */v2).unroll();
+
+ // NB: GHASHReduceGenerator also loads the next #unrolls blocks of
+ // data into v0, v0+ofs, the current state.
+ new GHASHReduceGenerator(masm,
+ unrolls,
+ /* result */v0,
+ /* lo */v5,
+ /* hi */v4,
+ p,
+ vzr,
+ data,
+ /* data */v2,
+ /* temp */v3,
+ true).unroll();
+
+ masm.sub(32, blocks, blocks, unrolls);
+ masm.compare(32, blocks, unrolls * 2);
+ masm.branchConditionally(ConditionFlag.GE, labelGHASHLoop);
+
+ // Merge the #unrolls states. Note that the data for the next
+ // iteration has already been loaded into v4, v4+ofs, etc...
+
+ // First, we multiply/reduce each clone by the appropriate power of H.
+ for (int i = 0; i < unrolls; i++) {
+ int ofs = i * REGISTER_STRIDE;
+ masm.fldr(128, hPrime, AArch64Address.createImmediateAddress(128, IMMEDIATE_SIGNED_UNSCALED, subkeyH, 16 * (unrolls - i - 1)));
+
+ masm.neon.rbitVV(ASIMDSize.FullReg, asFloatRegister(v2, ofs), asFloatRegister(v2, ofs));
+ // bit-swapped data ^ bit-swapped state
+ masm.neon.eorVVV(ASIMDSize.FullReg, asFloatRegister(v2, ofs), asFloatRegister(v0, ofs), asFloatRegister(v2, ofs));
+
+ masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, hPrime, hPrime);
+ masm.neon.rbitVV(ASIMDSize.FullReg, hPrime, hPrime);
+ // long-swap subkeyH into a1XORa0
+ masm.neon.extVVV(ASIMDSize.FullReg, a1XORa0, hPrime, hPrime, 0x08);
+ // xor subkeyH into subkeyL (Karatsuba: (A1+A0))
+ masm.neon.eorVVV(ASIMDSize.FullReg, a1XORa0, a1XORa0, hPrime);
+ ghashModmul(masm,
+ /* result */asFloatRegister(v0, ofs),
+ /* resultLo */asFloatRegister(v5, ofs),
+ /* resultHi */asFloatRegister(v4, ofs),
+ /* b */asFloatRegister(v2, ofs),
+ hPrime,
+ vzr,
+ a1XORa0,
+ p,
+ /* temps */asFloatRegister(v1, ofs),
+ asFloatRegister(v3, ofs),
+ /* reuse b */asFloatRegister(v2, ofs));
+ }
+
+ // Then we sum the results.
+ for (int i = 0; i < unrolls - 1; i++) {
+ int ofs = i * REGISTER_STRIDE;
+ masm.neon.eorVVV(ASIMDSize.FullReg, v0, v0, asFloatRegister(v0, ofs + REGISTER_STRIDE));
+ }
+
+ masm.sub(32, blocks, blocks, unrolls);
+
+ // And finally bit-reverse the state back to big endian.
+ masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v0, v0);
+ masm.neon.rbitVV(ASIMDSize.FullReg, v0, v0);
+ masm.fstr(128, v0, AArch64Address.createBaseRegisterOnlyAddress(128, state));
+ }
+
+ static final class GHASHMultiplyGenerator extends AArch64AESEncryptOp.KernelGenerator {
+
+ private final AArch64MacroAssembler masm;
+ private final Register resultLo;
+ private final Register resultHi;
+ private final Register b;
+ private final Register a;
+ private final Register vzr;
+ private final Register a1XORa0;
+ private final Register p;
+ private final Register tmp1;
+ private final Register tmp2;
+ private final Register tmp3;
+
+ GHASHMultiplyGenerator(AArch64MacroAssembler masm,
+ int unrolls,
+ Register resultLo,
+ Register resultHi,
+ Register b,
+ Register a,
+ Register a1XORa0,
+ Register p,
+ Register vzr,
+ Register tmp1,
+ Register tmp2,
+ Register tmp3) {
+ super(unrolls);
+ this.masm = masm;
+ this.resultLo = resultLo;
+ this.resultHi = resultHi;
+ this.b = b;
+ this.a = a;
+ this.a1XORa0 = a1XORa0;
+ this.p = p;
+ this.vzr = vzr;
+ this.tmp1 = tmp1;
+ this.tmp2 = tmp2;
+ this.tmp3 = tmp3;
+ }
+
+ @Override
+ public void generate(int index) {
+ // Karatsuba multiplication performs a 128*128 -> 256-bit
+ // multiplication in three 128-bit multiplications and a few
+ // additions.
+ //
+ // (C1:C0) = A1*B1, (D1:D0) = A0*B0, (E1:E0) = (A0+A1)(B0+B1)
+ // (A1:A0)(B1:B0) = C1:(C0+C1+D1+E1):(D1+C0+D0+E0):D0
+ //
+ // Inputs:
+ //
+ // A0 in a.d[0] (subkey)
+ // A1 in a.d[1]
+ // (A1+A0) in a1_xor_a0.d[0]
+ //
+ // B0 in b.d[0] (state)
+ // B1 in b.d[1]
+
+ switch (index) {
+ case 0:
+ masm.neon.extVVV(ASIMDSize.FullReg, tmp1, b, b, 0x08);
+ break;
+ case 1:
+ masm.neon.pmull2VVV(ElementSize.DoubleWord, resultHi, b, a); // A1*B1
+ break;
+ case 2:
+ masm.neon.eorVVV(ASIMDSize.FullReg, tmp1, tmp1, b); // (B1+B0)
+ break;
+ case 3:
+ masm.neon.pmullVVV(ElementSize.DoubleWord, resultLo, b, a); // A0*B0
+ break;
+ case 4:
+ masm.neon.pmullVVV(ElementSize.DoubleWord, tmp2, tmp1, a1XORa0); // (A1+A0)(B1+B0)
+ break;
+ case 5:
+ masm.neon.extVVV(ASIMDSize.FullReg, tmp1, resultLo, resultHi, 0x08);
+ break;
+ case 6:
+ masm.neon.eorVVV(ASIMDSize.FullReg, tmp3, resultHi, resultLo); // A1*B1+A0*B0
+ break;
+ case 7:
+ masm.neon.eorVVV(ASIMDSize.FullReg, tmp2, tmp2, tmp1);
+ break;
+ case 8:
+ masm.neon.eorVVV(ASIMDSize.FullReg, tmp2, tmp2, tmp3);
+ break;
+ // Register pair holds the result of carry-less multiplication
+ case 9:
+ masm.neon.insXX(ElementSize.DoubleWord, resultHi, 0, tmp2, 1);
+ break;
+ case 10:
+ masm.neon.insXX(ElementSize.DoubleWord, resultLo, 1, tmp2, 0);
+ break;
+ default:
+ throw GraalError.shouldNotReachHere();
+ }
+ }
+
+ @Override
+ public AArch64AESEncryptOp.KernelGenerator next() {
+ return new GHASHMultiplyGenerator(masm,
+ unrolls,
+ asFloatRegister(resultLo, REGISTER_STRIDE),
+ asFloatRegister(resultHi, REGISTER_STRIDE),
+ asFloatRegister(b, REGISTER_STRIDE),
+ a,
+ a1XORa0,
+ p,
+ vzr,
+ asFloatRegister(tmp1, REGISTER_STRIDE),
+ asFloatRegister(tmp2, REGISTER_STRIDE),
+ asFloatRegister(tmp3, REGISTER_STRIDE));
+ }
+
+ @Override
+ public int length() {
+ return 11;
+ }
+ }
+
+ /**
+ * Reduce the 128-bit product in hi:lo by the GCM field polynomial. The Register argument called
+ * data is optional: if it is a valid register, we interleave LD1 instructions with the
+ * reduction. This is to reduce latency next time around the loop.
+ */
+ static final class GHASHReduceGenerator extends AArch64AESEncryptOp.KernelGenerator {
+
+ private final AArch64MacroAssembler masm;
+ private final Register result;
+ private final Register lo;
+ private final Register hi;
+ private final Register p;
+ private final Register vzr;
+ private final Register dataPtr;
+ private final Register data;
+ private final Register t1;
+ private final boolean once;
+
+ GHASHReduceGenerator(AArch64MacroAssembler masm,
+ int unrolls,
+ Register result,
+ Register lo,
+ Register hi,
+ Register p,
+ Register vzr,
+ Register dataPtr,
+ Register data,
+ Register t1,
+ boolean once) {
+ super(unrolls);
+
+ this.masm = masm;
+ this.result = result;
+ this.lo = lo;
+ this.hi = hi;
+ this.p = p;
+ this.vzr = vzr;
+ this.dataPtr = dataPtr;
+ this.data = data;
+ this.t1 = t1;
+ this.once = once;
+ }
+
+ @Override
+ public void generate(int index) {
+ Register t0 = result;
+
+ switch (index) {
+ // The GCM field polynomial f is z^128 + p(z), where p =
+ // z^7+z^2+z+1.
+ //
+ // z^128 === -p(z) (mod (z^128 + p(z)))
+ //
+ // so, given that the product we're reducing is
+ // a == lo + hi * z^128
+ // substituting,
+ // === lo - hi * p(z) (mod (z^128 + p(z)))
+ //
+ // we reduce by multiplying hi by p(z) and subtracting the _result
+ // from (i.e. XORing it with) lo. Because p has no nonzero high
+ // bits we can do this with two 64-bit multiplications, lo*p and
+ // hi*p.
+ case 0:
+ masm.neon.pmull2VVV(ElementSize.DoubleWord, t0, hi, p);
+ break;
+ case 1:
+ masm.neon.extVVV(ASIMDSize.FullReg, t1, t0, vzr, 8);
+ break;
+ case 2:
+ masm.neon.eorVVV(ASIMDSize.FullReg, hi, hi, t1);
+ break;
+ case 3:
+ masm.neon.extVVV(ASIMDSize.FullReg, t1, vzr, t0, 8);
+ break;
+ case 4:
+ masm.neon.eorVVV(ASIMDSize.FullReg, lo, lo, t1);
+ break;
+ case 5:
+ masm.neon.pmullVVV(ElementSize.DoubleWord, t0, hi, p);
+ break;
+ case 6:
+ masm.neon.eorVVV(ASIMDSize.FullReg, result, lo, t0);
+ break;
+ default:
+ throw GraalError.shouldNotReachHere();
+ }
+
+ // Sprinkle load instructions into the generated instructions
+ if (!Register.None.equals(data) && once) {
+ assert length() >= unrolls : "not enough room for interleaved loads";
+ if (index < unrolls) {
+ masm.fldr(128, asFloatRegister(data, index * REGISTER_STRIDE),
+ AArch64Address.createImmediateAddress(128, IMMEDIATE_POST_INDEXED, dataPtr, 0x10));
+ }
+ }
+ }
+
+ @Override
+ public AArch64AESEncryptOp.KernelGenerator next() {
+ return new GHASHReduceGenerator(masm,
+ unrolls,
+ asFloatRegister(result, REGISTER_STRIDE),
+ asFloatRegister(lo, REGISTER_STRIDE),
+ asFloatRegister(hi, REGISTER_STRIDE),
+ p,
+ vzr,
+ dataPtr,
+ data,
+ asFloatRegister(t1, REGISTER_STRIDE),
+ false);
+ }
+
+ @Override
+ public int length() {
+ return 7;
+ }
+ }
+
+ /**
+ * Perform a GHASH multiply/reduce on a single FloatRegister.
+ */
+ private static void ghashModmul(AArch64MacroAssembler masm,
+ Register result,
+ Register resultLo,
+ Register resultHi,
+ Register b,
+ Register a,
+ Register vzr,
+ Register a1XORa0,
+ Register p,
+ Register t1,
+ Register t2,
+ Register t3) {
+ ghashMultiply(masm, resultLo, resultHi, a, b, a1XORa0, t1, t2, t3);
+ ghashReduce(masm, result, resultLo, resultHi, p, vzr, t1);
+ }
+
+ private static void ghashReduce(AArch64MacroAssembler masm,
+ Register result,
+ Register lo,
+ Register hi,
+ Register p,
+ Register vzr,
+ Register t1) {
+ Register t0 = result;
+
+ // The GCM field polynomial f is z^128 + p(z), where p =
+ // z^7+z^2+z+1.
+ //
+ // z^128 === -p(z) (mod (z^128 + p(z)))
+ //
+ // so, given that the product we're reducing is
+ // a == lo + hi * z^128
+ // substituting,
+ // === lo - hi * p(z) (mod (z^128 + p(z)))
+ //
+ // we reduce by multiplying hi by p(z) and subtracting the result
+ // from (i.e. XORing it with) lo. Because p has no nonzero high
+ // bits we can do this with two 64-bit multiplications, lo*p and
+ // hi*p.
+
+ masm.neon.pmull2VVV(ElementSize.DoubleWord, t0, hi, p);
+ masm.neon.extVVV(ASIMDSize.FullReg, t1, t0, vzr, 8);
+ masm.neon.eorVVV(ASIMDSize.FullReg, hi, hi, t1);
+ masm.neon.extVVV(ASIMDSize.FullReg, t1, vzr, t0, 8);
+ masm.neon.eorVVV(ASIMDSize.FullReg, lo, lo, t1);
+ masm.neon.pmullVVV(ElementSize.DoubleWord, t0, hi, p);
+ masm.neon.eorVVV(ASIMDSize.FullReg, result, lo, t0);
+ }
+
+ /**
+ *
+ * ghashMultiply and ghashReduce are the non-unrolled versions of the GHASH function generators.
+ */
+ private static void ghashMultiply(AArch64MacroAssembler masm,
+ Register resultLo,
+ Register resultHi,
+ Register a,
+ Register b,
+ Register a1XORa0,
+ Register tmp1,
+ Register tmp2,
+ Register tmp3) {
+ // Karatsuba multiplication performs a 128*128 -> 256-bit
+ // multiplication in three 128-bit multiplications and a few
+ // additions.
+ //
+ // (C1:C0) = A1*B1, (D1:D0) = A0*B0, (E1:E0) = (A0+A1)(B0+B1)
+ // (A1:A0)(B1:B0) = C1:(C0+C1+D1+E1):(D1+C0+D0+E0):D0
+ //
+ // Inputs:
+ //
+ // A0 in a.d[0] (subkey)
+ // A1 in a.d[1]
+ // (A1+A0) in a1_xor_a0.d[0]
+ //
+ // B0 in b.d[0] (state)
+ // B1 in b.d[1]
+ masm.neon.extVVV(ASIMDSize.FullReg, tmp1, b, b, 0x08);
+ masm.neon.pmull2VVV(ElementSize.DoubleWord, resultHi, b, a); // A1*B1
+ masm.neon.eorVVV(ASIMDSize.FullReg, tmp1, tmp1, b); // (B1+B0)
+ masm.neon.pmullVVV(ElementSize.DoubleWord, resultLo, b, a); // A0*B0
+ masm.neon.pmullVVV(ElementSize.DoubleWord, tmp2, tmp1, a1XORa0); // (A1+A0)(B1+B0)
+
+ masm.neon.extVVV(ASIMDSize.FullReg, tmp1, resultLo, resultHi, 0x08);
+ masm.neon.eorVVV(ASIMDSize.FullReg, tmp3, resultHi, resultLo); // A1*B1+A0*B0
+ masm.neon.eorVVV(ASIMDSize.FullReg, tmp2, tmp2, tmp1);
+ masm.neon.eorVVV(ASIMDSize.FullReg, tmp2, tmp2, tmp3);
+
+ // Register pair holds the result of carry-less multiplication
+ masm.neon.insXX(ElementSize.DoubleWord, resultHi, 0, tmp2, 1);
+ masm.neon.insXX(ElementSize.DoubleWord, resultLo, 1, tmp2, 0);
+ }
+}
diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64ArrayCompareToOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64ArrayCompareToOp.java
index 90a46dff8f25..7c96611a5028 100644
--- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64ArrayCompareToOp.java
+++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64ArrayCompareToOp.java
@@ -268,24 +268,24 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
// and sub the size too
masm.sublAndJcc(cnt2, stride2x2, ConditionFlag.NotZero, labelCompareWideVectorsLoopAVX3, true);
- masm.vpxor(vec1, vec1, vec1);
+ masm.vpxor(vec1, vec1, vec1, AVXSize.YMM);
masm.jmpb(labelCompareWideTail);
}
masm.bind(labelCompareWideVectorsLoopAVX2);
if (strideA == strideB) {
masm.vmovdqu(vec1, new AMD64Address(str1, result, maxStride));
- masm.vpxor(vec1, vec1, new AMD64Address(str2, result, maxStride));
+ masm.vpxor(vec1, vec1, new AMD64Address(str2, result, maxStride), AVXSize.YMM);
} else {
masm.vpmovzxbw(vec1, new AMD64Address(str1, result, scale1));
- masm.vpxor(vec1, vec1, new AMD64Address(str2, result, scale2));
+ masm.vpxor(vec1, vec1, new AMD64Address(str2, result, scale2), AVXSize.YMM);
}
- masm.vptest(vec1, vec1);
+ masm.vptest(vec1, vec1, AVXSize.YMM);
masm.jcc(ConditionFlag.NotZero, labelVectorNotEqual);
masm.addq(result, elementsPerYMMVector);
masm.sublAndJcc(cnt2, elementsPerYMMVector, ConditionFlag.NotZero, labelCompareWideVectorsLoop, false);
// clean upper bits of YMM registers
- masm.vpxor(vec1, vec1, vec1);
+ masm.vpxor(vec1, vec1, vec1, AVXSize.YMM);
// compare wide vectors tail
masm.bind(labelCompareWideTail);
@@ -299,7 +299,7 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
// Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
masm.bind(labelVectorNotEqual);
// clean upper bits of YMM registers
- masm.vpxor(vec1, vec1, vec1);
+ masm.vpxor(vec1, vec1, vec1, AVXSize.YMM);
if (strideA == strideB) {
masm.leaq(str1, new AMD64Address(str1, result, maxStride));
masm.leaq(str2, new AMD64Address(str2, result, maxStride));
diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64EncodeArrayOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64EncodeArrayOp.java
index 0a5416a0b817..c5451c02045d 100644
--- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64EncodeArrayOp.java
+++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64EncodeArrayOp.java
@@ -36,6 +36,7 @@
import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.asm.amd64.AMD64Address;
+import org.graalvm.compiler.asm.amd64.AVXKind;
import org.graalvm.compiler.core.common.Stride;
import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag;
import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
@@ -55,9 +56,9 @@
// @formatter:off
@StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86.cpp",
- lineStart = 5760,
- lineEnd = 5918,
- commit = "d00e7b92b4a6d33f5db6e2aedce5e058832a23de",
+ lineStart = 5793,
+ lineEnd = 5951,
+ commit = "926380d3b748fd591f45abc99c497abc62c52565",
sha1 = "28e9e817bee0afd9e5b698c5bff3ed519e09e410")
// @formatter:on
@Opcode("AMD64_ENCODE_ARRAY")
@@ -161,7 +162,7 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
masm.vmovdqu(vectorTemp3, new AMD64Address(src, len, Stride.S2, -64));
masm.vmovdqu(vectorTemp4, new AMD64Address(src, len, Stride.S2, -32));
masm.emit(VPOR, vectorTemp2, vectorTemp3, vectorTemp4, YMM);
- masm.vptest(vectorTemp2, vectorTemp1);
+ masm.vptest(vectorTemp2, vectorTemp1, AVXKind.AVXSize.YMM);
masm.jcc(ConditionFlag.NotZero, labelCopy32CharsExit, true);
masm.emit(VPACKUSWB, vectorTemp3, vectorTemp3, vectorTemp4, YMM);
masm.emit(VPERMQ, vectorTemp4, vectorTemp3, 0xD8, YMM);
@@ -184,7 +185,7 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
if (supportsAVX2AndYMM()) {
masm.vmovdqu(vectorTemp2, new AMD64Address(src, len, Stride.S2, -32));
- masm.vptest(vectorTemp2, vectorTemp1);
+ masm.vptest(vectorTemp2, vectorTemp1, AVXKind.AVXSize.YMM);
masm.jcc(ConditionFlag.NotZero, labelCopy16CharsExit);
masm.emit(VPACKUSWB, vectorTemp2, vectorTemp2, vectorTemp1, YMM);
masm.emit(VPERMQ, vectorTemp3, vectorTemp2, 0xD8, YMM);
diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64GHASHProcessBlocksOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64GHASHProcessBlocksOp.java
new file mode 100644
index 000000000000..9bdf8ae16880
--- /dev/null
+++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64GHASHProcessBlocksOp.java
@@ -0,0 +1,575 @@
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.graalvm.compiler.lir.amd64;
+
+import static jdk.vm.ci.amd64.AMD64.rax;
+import static jdk.vm.ci.amd64.AMD64.xmm0;
+import static jdk.vm.ci.amd64.AMD64.xmm1;
+import static jdk.vm.ci.amd64.AMD64.xmm10;
+import static jdk.vm.ci.amd64.AMD64.xmm11;
+import static jdk.vm.ci.amd64.AMD64.xmm13;
+import static jdk.vm.ci.amd64.AMD64.xmm14;
+import static jdk.vm.ci.amd64.AMD64.xmm15;
+import static jdk.vm.ci.amd64.AMD64.xmm2;
+import static jdk.vm.ci.amd64.AMD64.xmm3;
+import static jdk.vm.ci.amd64.AMD64.xmm4;
+import static jdk.vm.ci.amd64.AMD64.xmm5;
+import static jdk.vm.ci.amd64.AMD64.xmm6;
+import static jdk.vm.ci.amd64.AMD64.xmm7;
+import static jdk.vm.ci.amd64.AMD64.xmm8;
+import static jdk.vm.ci.amd64.AMD64.xmm9;
+import static jdk.vm.ci.code.ValueUtil.asRegister;
+import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
+import static org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper.pointerConstant;
+import static org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper.recordExternalAddress;
+
+import org.graalvm.compiler.asm.Label;
+import org.graalvm.compiler.asm.amd64.AMD64Address;
+import org.graalvm.compiler.asm.amd64.AMD64Assembler;
+import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag;
+import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
+import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize;
+import org.graalvm.compiler.lir.LIRInstructionClass;
+import org.graalvm.compiler.lir.StubPort;
+import org.graalvm.compiler.lir.asm.ArrayDataPointerConstant;
+import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
+import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
+
+import jdk.vm.ci.amd64.AMD64;
+import jdk.vm.ci.code.Register;
+import jdk.vm.ci.meta.AllocatableValue;
+import jdk.vm.ci.meta.Value;
+
+// @formatter:off
+@StubPort(path = "src/hotspot/cpu/x86/stubGenerator_x86_64.cpp",
+ lineStart = 5281,
+ lineEnd = 5448,
+ commit = "27af0144ea57e86d9b81c2b328fad66e4a046f61",
+ sha1 = "dde6c3a58860fe4182bb03861710e6ed5b55cb51")
+@StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86_aes.cpp",
+ lineStart = 490,
+ lineEnd = 780,
+ commit = "27af0144ea57e86d9b81c2b328fad66e4a046f61",
+ sha1 = "2fae9aba4278b89fff3021a5e14450541d21b52f")
+// @formatter:on
+public final class AMD64GHASHProcessBlocksOp extends AMD64LIRInstruction {
+
+ public static final LIRInstructionClass TYPE = LIRInstructionClass.create(AMD64GHASHProcessBlocksOp.class);
+
+ @Alive({REG}) private Value stateValue;
+ @Alive({REG}) private Value htblValue;
+ @Alive({REG}) private Value originalDataValue;
+ @Alive({REG}) private Value originalBlocksValue;
+
+ @Temp protected Value dataValue;
+ @Temp protected Value blocksValue;
+
+ @Temp protected Value[] temps;
+
+ public AMD64GHASHProcessBlocksOp(LIRGeneratorTool tool,
+ AllocatableValue stateValue,
+ AllocatableValue htblValue,
+ AllocatableValue originalDataValue,
+ AllocatableValue originalBlocksValue) {
+ super(TYPE);
+
+ this.stateValue = stateValue;
+ this.htblValue = htblValue;
+ this.originalDataValue = originalDataValue;
+ this.originalBlocksValue = originalBlocksValue;
+
+ this.dataValue = tool.newVariable(originalDataValue.getValueKind());
+ this.blocksValue = tool.newVariable(originalBlocksValue.getValueKind());
+
+ if (((AMD64) tool.target().arch).getFeatures().contains(AMD64.CPUFeature.AVX)) {
+ this.temps = new Value[]{
+ rax.asValue(),
+ xmm0.asValue(),
+ xmm1.asValue(),
+ xmm2.asValue(),
+ xmm3.asValue(),
+ xmm4.asValue(),
+ xmm5.asValue(),
+ xmm6.asValue(),
+ xmm7.asValue(),
+ xmm8.asValue(),
+ xmm9.asValue(),
+ xmm10.asValue(),
+ xmm11.asValue(),
+ xmm13.asValue(),
+ xmm14.asValue(),
+ xmm15.asValue(),
+ };
+ } else {
+ this.temps = new Value[]{
+ xmm0.asValue(),
+ xmm1.asValue(),
+ xmm2.asValue(),
+ xmm3.asValue(),
+ xmm4.asValue(),
+ xmm5.asValue(),
+ xmm6.asValue(),
+ xmm7.asValue(),
+ xmm8.asValue(),
+ xmm9.asValue(),
+ xmm10.asValue(),
+ };
+ }
+ }
+
+ private ArrayDataPointerConstant ghashLongSwapMask = pointerConstant(16, new int[]{
+ // @formatter:off
+ 0x0b0a0908, 0x0f0e0d0c, 0x03020100, 0x07060504
+ // @formatter:on
+ });
+
+ private ArrayDataPointerConstant ghashByteSwapMask = pointerConstant(16, new int[]{
+ // @formatter:off
+ 0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203
+ // @formatter:on
+ });
+
+ private ArrayDataPointerConstant ghashShuffleMask = pointerConstant(16, new int[]{
+ // @formatter:off
+ 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f
+ // @formatter:on
+ });
+
+ private ArrayDataPointerConstant ghashPolynomial = pointerConstant(16, new int[]{
+ // @formatter:off
+ 0x00000001, 0x00000000, 0x00000000, 0xc2000000
+ // @formatter:on
+ });
+
+ @Override
+ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
+ if (masm.supports(AMD64.CPUFeature.AVX)) {
+ Label labelBeginProcess = new Label();
+ Label labelBlock8Reduction = new Label();
+ Label labelOneBlkInit = new Label();
+ Label labelProcess1Block = new Label();
+ Label labelProcess8Blocks = new Label();
+ Label labelSaveState = new Label();
+ Label labelExitGHASH = new Label();
+
+ Register inputState = asRegister(stateValue);
+ Register htbl = asRegister(htblValue);
+ Register originalData = asRegister(originalDataValue);
+ Register originalBlocks = asRegister(originalBlocksValue);
+
+ Register inputData = asRegister(dataValue);
+ Register blocks = asRegister(blocksValue);
+
+ masm.movq(inputData, originalData);
+ masm.movq(blocks, originalBlocks);
+
+ // temporary variables to hold input data and input state
+ Register data = xmm1;
+ Register state = xmm0;
+ // temporary variables to hold intermediate results
+ Register tmp0 = xmm3;
+ Register tmp1 = xmm4;
+ Register tmp2 = xmm5;
+ Register tmp3 = xmm6;
+ // temporary variables to hold byte and long swap masks
+ Register bswapMask = xmm2;
+ Register lswapMask = xmm14;
+
+ masm.testqAndJcc(blocks, blocks, ConditionFlag.Zero, labelExitGHASH, false);
+
+ // Check if Hashtable (1*16) has been already generated
+ // For anything less than 8 blocks, we generate only the first power of H.
+ masm.movdqu(tmp2, new AMD64Address(htbl, 1 * 16));
+ masm.vptest(tmp2, tmp2, AVXSize.XMM);
+ masm.jcc(ConditionFlag.NotZero, labelBeginProcess);
+ generateHtblOneBlock(crb, masm, htbl);
+
+ masm.bind(labelBeginProcess);
+ masm.movdqu(lswapMask, recordExternalAddress(crb, ghashLongSwapMask));
+ masm.movdqu(state, new AMD64Address(inputState));
+ masm.vpshufb(state, state, lswapMask, AVXSize.XMM);
+
+ masm.cmplAndJcc(blocks, 8, ConditionFlag.Below, labelOneBlkInit, false);
+ // If we have 8 blocks or more data, then generate remaining powers of H
+ masm.movdqu(tmp2, new AMD64Address(htbl, 8 * 16));
+ masm.vptest(tmp2, tmp2, AVXSize.XMM);
+ masm.jcc(ConditionFlag.NotZero, labelProcess8Blocks);
+ generateHtblEightBlocks(masm, htbl);
+
+ // Do 8 multiplies followed by a reduction processing 8 blocks of data at a time
+ // Each block = 16 bytes.
+ masm.bind(labelProcess8Blocks);
+ masm.subl(blocks, 8);
+ masm.movdqu(bswapMask, recordExternalAddress(crb, ghashByteSwapMask));
+ masm.movdqu(data, new AMD64Address(inputData, 16 * 7));
+ masm.vpshufb(data, data, bswapMask, AVXSize.XMM);
+ // Loading 1*16 as calculated powers of H required starts at that location.
+ masm.movdqu(xmm15, new AMD64Address(htbl, 1 * 16));
+ // Perform carryless multiplication of (H*2, data block #7)
+ masm.vpclmulhqlqdq(tmp2, data, xmm15); // a0 * b1
+ masm.vpclmullqlqdq(tmp0, data, xmm15); // a0 * b0
+ masm.vpclmulhqhqdq(tmp1, data, xmm15); // a1 * b1
+ masm.vpclmullqhqdq(tmp3, data, xmm15); // a1 * b0
+ masm.vpxor(tmp2, tmp2, tmp3, AVXSize.XMM); // (a0 * b1) + (a1 * b0)
+
+ masm.movdqu(data, new AMD64Address(inputData, 16 * 6));
+ masm.vpshufb(data, data, bswapMask, AVXSize.XMM);
+ // Perform carryless multiplication of (H^2 * 2, data block #6)
+ schoolbookAAD(masm, 2, htbl, data, tmp0, tmp1, tmp2, tmp3);
+
+ masm.movdqu(data, new AMD64Address(inputData, 16 * 5));
+ masm.vpshufb(data, data, bswapMask, AVXSize.XMM);
+ // Perform carryless multiplication of (H^3 * 2, data block #5)
+ schoolbookAAD(masm, 3, htbl, data, tmp0, tmp1, tmp2, tmp3);
+ masm.movdqu(data, new AMD64Address(inputData, 16 * 4));
+ masm.vpshufb(data, data, bswapMask, AVXSize.XMM);
+ // Perform carryless multiplication of (H^4 * 2, data block #4)
+ schoolbookAAD(masm, 4, htbl, data, tmp0, tmp1, tmp2, tmp3);
+ masm.movdqu(data, new AMD64Address(inputData, 16 * 3));
+ masm.vpshufb(data, data, bswapMask, AVXSize.XMM);
+ // Perform carryless multiplication of (H^5 * 2, data block #3)
+ schoolbookAAD(masm, 5, htbl, data, tmp0, tmp1, tmp2, tmp3);
+ masm.movdqu(data, new AMD64Address(inputData, 16 * 2));
+ masm.vpshufb(data, data, bswapMask, AVXSize.XMM);
+ // Perform carryless multiplication of (H^6 * 2, data block #2)
+ schoolbookAAD(masm, 6, htbl, data, tmp0, tmp1, tmp2, tmp3);
+ masm.movdqu(data, new AMD64Address(inputData, 16 * 1));
+ masm.vpshufb(data, data, bswapMask, AVXSize.XMM);
+ // Perform carryless multiplication of (H^7 * 2, data block #1)
+ schoolbookAAD(masm, 7, htbl, data, tmp0, tmp1, tmp2, tmp3);
+ masm.movdqu(data, new AMD64Address(inputData, 16 * 0));
+ // xor data block#0 with input state before performing carry-less multiplication
+ masm.vpshufb(data, data, bswapMask, AVXSize.XMM);
+ masm.vpxor(data, data, state, AVXSize.XMM);
+ // Perform carryless multiplication of (H^8 * 2, data block #0)
+ schoolbookAAD(masm, 8, htbl, data, tmp0, tmp1, tmp2, tmp3);
+ masm.vpslldq(tmp3, tmp2, 8, AVXSize.XMM);
+ masm.vpsrldq(tmp2, tmp2, 8, AVXSize.XMM);
+ // tmp0, tmp1 contains aggregated results of the multiplication operation
+ masm.vpxor(tmp0, tmp0, tmp3, AVXSize.XMM);
+ masm.vpxor(tmp1, tmp1, tmp2, AVXSize.XMM);
+
+ // we have the 2 128-bit partially accumulated multiplication results in tmp0:tmp1
+ // with higher 128-bit in tmp1 and lower 128-bit in corresponding tmp0
+ // Follows the reduction technique mentioned in
+ // Shift-XOR reduction described in Gueron-Kounavis May 2010
+ masm.bind(labelBlock8Reduction);
+ // First Phase of the reduction
+ masm.vpslld(xmm8, tmp0, 31, AVXSize.XMM); // packed right shifting << 31
+ masm.vpslld(xmm9, tmp0, 30, AVXSize.XMM); // packed right shifting << 30
+ masm.vpslld(xmm10, tmp0, 25, AVXSize.XMM); // packed right shifting << 25
+ // xor the shifted versions
+ masm.vpxor(xmm8, xmm8, xmm10, AVXSize.XMM);
+ masm.vpxor(xmm8, xmm8, xmm9, AVXSize.XMM);
+
+ masm.vpslldq(xmm9, xmm8, 12, AVXSize.XMM);
+ masm.vpsrldq(xmm8, xmm8, 4, AVXSize.XMM);
+
+ masm.vpxor(tmp0, tmp0, xmm9, AVXSize.XMM); // first phase of reduction is complete
+ // second phase of the reduction
+ masm.vpsrld(xmm9, tmp0, 1, AVXSize.XMM); // packed left shifting >> 1
+ masm.vpsrld(xmm10, tmp0, 2, AVXSize.XMM); // packed left shifting >> 2
+ masm.vpsrld(tmp2, tmp0, 7, AVXSize.XMM); // packed left shifting >> 7
+ // xor the shifted versions
+ masm.vpxor(xmm9, xmm9, xmm10, AVXSize.XMM);
+ masm.vpxor(xmm9, xmm9, tmp2, AVXSize.XMM);
+ masm.vpxor(xmm9, xmm9, xmm8, AVXSize.XMM);
+ masm.vpxor(tmp0, xmm9, tmp0, AVXSize.XMM);
+ // Final result is in state
+ masm.vpxor(state, tmp0, tmp1, AVXSize.XMM);
+
+ masm.leaq(inputData, new AMD64Address(inputData, 16 * 8));
+ masm.cmplAndJcc(blocks, 8, AMD64Assembler.ConditionFlag.Below, labelOneBlkInit, false);
+ masm.jmp(labelProcess8Blocks);
+
+ // Since this is one block operation we will only use H * 2 i.e. the first power of H
+ masm.bind(labelOneBlkInit);
+ masm.movdqu(tmp0, new AMD64Address(htbl, 1 * 16));
+ masm.movdqu(bswapMask, recordExternalAddress(crb, ghashByteSwapMask));
+
+ // Do one (128 bit x 128 bit) carry-less multiplication at a time followed by a
+ // reduction.
+ masm.bind(labelProcess1Block);
+ masm.cmplAndJcc(blocks, 0, AMD64Assembler.ConditionFlag.Equal, labelSaveState, false);
+ masm.subl(blocks, 1);
+ masm.movdqu(data, new AMD64Address(inputData));
+ masm.vpshufb(data, data, bswapMask, AVXSize.XMM);
+ masm.vpxor(state, state, data, AVXSize.XMM);
+ // gfmul(H*2, state)
+ gfmul(masm, tmp0, state);
+ masm.addq(inputData, 16);
+ masm.jmp(labelProcess1Block);
+
+ masm.bind(labelSaveState);
+ masm.vpshufb(state, state, lswapMask, AVXSize.XMM);
+ masm.movdqu(new AMD64Address(inputState), state);
+
+ masm.bind(labelExitGHASH);
+ // zero out xmm registers used for Htbl storage
+ masm.vpxor(xmm0, xmm0, xmm0, AVXSize.XMM);
+ masm.vpxor(xmm1, xmm1, xmm1, AVXSize.XMM);
+ masm.vpxor(xmm3, xmm3, xmm3, AVXSize.XMM);
+ masm.vpxor(xmm15, xmm15, xmm15, AVXSize.XMM);
+ } else {
+ Label labelGHASHLoop = new Label();
+ Label labelExit = new Label();
+
+ Register state = asRegister(stateValue);
+ Register subkeyH = asRegister(htblValue);
+ Register originalData = asRegister(originalDataValue);
+ Register originalBlocks = asRegister(originalBlocksValue);
+
+ Register data = asRegister(dataValue);
+ Register blocks = asRegister(blocksValue);
+
+ masm.movq(data, originalData);
+ masm.movq(blocks, originalBlocks);
+
+ Register xmmTemp0 = xmm0;
+ Register xmmTemp1 = xmm1;
+ Register xmmTemp2 = xmm2;
+ Register xmmTemp3 = xmm3;
+ Register xmmTemp4 = xmm4;
+ Register xmmTemp5 = xmm5;
+ Register xmmTemp6 = xmm6;
+ Register xmmTemp7 = xmm7;
+ Register xmmTemp8 = xmm8;
+ Register xmmTemp9 = xmm9;
+ Register xmmTemp10 = xmm10;
+
+ masm.movdqu(xmmTemp10, recordExternalAddress(crb, ghashLongSwapMask));
+
+ masm.movdqu(xmmTemp0, new AMD64Address(state));
+ masm.pshufb(xmmTemp0, xmmTemp10);
+
+ masm.bind(labelGHASHLoop);
+ masm.movdqu(xmmTemp2, new AMD64Address(data));
+ masm.pshufb(xmmTemp2, recordExternalAddress(crb, ghashByteSwapMask));
+
+ masm.movdqu(xmmTemp1, new AMD64Address(subkeyH));
+ masm.pshufb(xmmTemp1, xmmTemp10);
+
+ masm.pxor(xmmTemp0, xmmTemp2);
+
+ // Multiply with the hash key
+ masm.movdqu(xmmTemp3, xmmTemp0);
+ masm.pclmulqdq(xmmTemp3, xmmTemp1, 0); // xmm3 holds a0*b0
+ masm.movdqu(xmmTemp4, xmmTemp0);
+ masm.pclmulqdq(xmmTemp4, xmmTemp1, 16); // xmm4 holds a0*b1
+
+ masm.movdqu(xmmTemp5, xmmTemp0);
+ masm.pclmulqdq(xmmTemp5, xmmTemp1, 1); // xmm5 holds a1*b0
+ masm.movdqu(xmmTemp6, xmmTemp0);
+ masm.pclmulqdq(xmmTemp6, xmmTemp1, 17); // xmm6 holds a1*b1
+
+ masm.pxor(xmmTemp4, xmmTemp5); // xmm4 holds a0*b1 + a1*b0
+
+ masm.movdqu(xmmTemp5, xmmTemp4); // move the contents of xmm4 to xmm5
+ masm.psrldq(xmmTemp4, 8); // shift by xmm4 64 bits to the right
+ masm.pslldq(xmmTemp5, 8); // shift by xmm5 64 bits to the left
+ // Register pair holds the result of the carry-less multiplication of xmm0
+ // by xmm1.
+ masm.pxor(xmmTemp3, xmmTemp5);
+ masm.pxor(xmmTemp6, xmmTemp4);
+
+ // We shift the result of the multiplication by one bit position
+ // to the left to cope for the fact that the bits are reversed.
+ masm.movdqu(xmmTemp7, xmmTemp3);
+ masm.movdqu(xmmTemp8, xmmTemp6);
+ masm.pslld(xmmTemp3, 1);
+ masm.pslld(xmmTemp6, 1);
+ masm.psrld(xmmTemp7, 31);
+ masm.psrld(xmmTemp8, 31);
+ masm.movdqu(xmmTemp9, xmmTemp7);
+ masm.pslldq(xmmTemp8, 4);
+ masm.pslldq(xmmTemp7, 4);
+ masm.psrldq(xmmTemp9, 12);
+ masm.por(xmmTemp3, xmmTemp7);
+ masm.por(xmmTemp6, xmmTemp8);
+ masm.por(xmmTemp6, xmmTemp9);
+
+ //
+ // First phase of the reduction
+ //
+ // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts
+ // independently.
+ masm.movdqu(xmmTemp7, xmmTemp3);
+ masm.movdqu(xmmTemp8, xmmTemp3);
+ masm.movdqu(xmmTemp9, xmmTemp3);
+ masm.pslld(xmmTemp7, 31); // packed right shift shifting << 31
+ masm.pslld(xmmTemp8, 30); // packed right shift shifting << 30
+ masm.pslld(xmmTemp9, 25); // packed right shift shifting << 25
+ masm.pxor(xmmTemp7, xmmTemp8); // xor the shifted versions
+ masm.pxor(xmmTemp7, xmmTemp9);
+ masm.movdqu(xmmTemp8, xmmTemp7);
+ masm.pslldq(xmmTemp7, 12);
+ masm.psrldq(xmmTemp8, 4);
+ masm.pxor(xmmTemp3, xmmTemp7); // first phase of the reduction complete
+
+ //
+ // Second phase of the reduction
+ //
+ // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these
+ // shift operations.
+ masm.movdqu(xmmTemp2, xmmTemp3);
+ masm.movdqu(xmmTemp4, xmmTemp3);
+ masm.movdqu(xmmTemp5, xmmTemp3);
+ masm.psrld(xmmTemp2, 1); // packed left shifting >> 1
+ masm.psrld(xmmTemp4, 2); // packed left shifting >> 2
+ masm.psrld(xmmTemp5, 7); // packed left shifting >> 7
+ masm.pxor(xmmTemp2, xmmTemp4); // xor the shifted versions
+ masm.pxor(xmmTemp2, xmmTemp5);
+ masm.pxor(xmmTemp2, xmmTemp8);
+ masm.pxor(xmmTemp3, xmmTemp2);
+ masm.pxor(xmmTemp6, xmmTemp3); // the result is in xmm6
+
+ masm.decqAndJcc(blocks, AMD64Assembler.ConditionFlag.Zero, labelExit, false);
+ masm.movdqu(xmmTemp0, xmmTemp6);
+ masm.addq(data, 16);
+ masm.jmp(labelGHASHLoop);
+
+ masm.bind(labelExit);
+ masm.pshufb(xmmTemp6, xmmTemp10); // Byte swap 16-byte result
+ masm.movdqu(new AMD64Address(state), xmmTemp6); // store the result
+ }
+ }
+
+ /**
+ * Multiply 128 x 128 bits, using 4 pclmulqdq operations.
+ */
+ private static void schoolbookAAD(AMD64MacroAssembler masm, int i, Register htbl, Register data, Register tmp0, Register tmp1, Register tmp2, Register tmp3) {
+ masm.movdqu(xmm15, new AMD64Address(htbl, i * 16));
+ masm.vpclmulhqlqdq(tmp3, data, xmm15); // 0x01
+ masm.vpxor(tmp2, tmp2, tmp3, AVXSize.XMM);
+ masm.vpclmullqlqdq(tmp3, data, xmm15); // 0x00
+ masm.vpxor(tmp0, tmp0, tmp3, AVXSize.XMM);
+ masm.vpclmulhqhqdq(tmp3, data, xmm15); // 0x11
+ masm.vpxor(tmp1, tmp1, tmp3, AVXSize.XMM);
+ masm.vpclmullqhqdq(tmp3, data, xmm15); // 0x10
+ masm.vpxor(tmp2, tmp2, tmp3, AVXSize.XMM);
+ }
+
+ /**
+ * Multiply two 128 bit numbers resulting in a 256 bit value Result of the multiplication
+ * followed by reduction stored in state.
+ */
+ private static void gfmul(AMD64MacroAssembler masm, Register tmp0, Register state) {
+ Register tmp1 = xmm4;
+ Register tmp2 = xmm5;
+ Register tmp3 = xmm6;
+ Register tmp4 = xmm7;
+
+ masm.vpclmullqlqdq(tmp1, state, tmp0); // 0x00 (a0 * b0)
+ masm.vpclmulhqhqdq(tmp4, state, tmp0); // 0x11 (a1 * b1)
+ masm.vpclmullqhqdq(tmp2, state, tmp0); // 0x10 (a1 * b0)
+ masm.vpclmulhqlqdq(tmp3, state, tmp0); // 0x01 (a0 * b1)
+
+ masm.vpxor(tmp2, tmp2, tmp3, AVXSize.XMM); // (a0 * b1) + (a1 * b0)
+
+ masm.vpslldq(tmp3, tmp2, 8, AVXSize.XMM);
+ masm.vpsrldq(tmp2, tmp2, 8, AVXSize.XMM);
+ masm.vpxor(tmp1, tmp1, tmp3, AVXSize.XMM); // tmp1 and tmp4 hold the result
+ masm.vpxor(tmp4, tmp4, tmp2, AVXSize.XMM); // of carryless multiplication
+ // Follows the reduction technique mentioned in
+ // Shift-XOR reduction described in Gueron-Kounavis May 2010
+
+ // First phase of reduction
+ masm.vpslld(xmm8, tmp1, 31, AVXSize.XMM); // packed right shift shifting << 31
+ masm.vpslld(xmm9, tmp1, 30, AVXSize.XMM); // packed right shift shifting << 30
+ masm.vpslld(xmm10, tmp1, 25, AVXSize.XMM); // packed right shift shifting << 25
+ // xor the shifted versions
+ masm.vpxor(xmm8, xmm8, xmm9, AVXSize.XMM);
+ masm.vpxor(xmm8, xmm8, xmm10, AVXSize.XMM);
+ masm.vpslldq(xmm9, xmm8, 12, AVXSize.XMM);
+ masm.vpsrldq(xmm8, xmm8, 4, AVXSize.XMM);
+ masm.vpxor(tmp1, tmp1, xmm9, AVXSize.XMM); // first phase of the reduction complete
+
+ // Second phase of the reduction
+ masm.vpsrld(xmm9, tmp1, 1, AVXSize.XMM); // packed left shifting >> 1
+ masm.vpsrld(xmm10, tmp1, 2, AVXSize.XMM); // packed left shifting >> 2
+ masm.vpsrld(xmm11, tmp1, 7, AVXSize.XMM); // packed left shifting >> 7
+ masm.vpxor(xmm9, xmm9, xmm10, AVXSize.XMM); // xor the shifted versions
+ masm.vpxor(xmm9, xmm9, xmm11, AVXSize.XMM);
+ masm.vpxor(xmm9, xmm9, xmm8, AVXSize.XMM);
+ masm.vpxor(tmp1, tmp1, xmm9, AVXSize.XMM);
+ masm.vpxor(state, tmp4, tmp1, AVXSize.XMM); // the result is in state
+ }
+
+ private void generateHtblOneBlock(CompilationResultBuilder crb, AMD64MacroAssembler masm, Register htbl) {
+ Register t = xmm13;
+
+ // load the original subkey hash
+ masm.movdqu(t, new AMD64Address(htbl));
+ // shuffle using long swap mask
+ masm.movdqu(xmm10, recordExternalAddress(crb, ghashLongSwapMask));
+ masm.vpshufb(t, t, xmm10, AVXSize.XMM);
+
+ // Compute H' = GFMUL(H, 2)
+ masm.vpsrld(xmm3, t, 7, AVXSize.XMM);
+ masm.movdqu(xmm4, recordExternalAddress(crb, ghashShuffleMask));
+ masm.vpshufb(xmm3, xmm3, xmm4, AVXSize.XMM);
+ masm.movl(rax, 0xff00);
+ masm.movdl(xmm4, rax);
+ masm.vpshufb(xmm4, xmm4, xmm3, AVXSize.XMM);
+ masm.movdqu(xmm5, recordExternalAddress(crb, ghashPolynomial));
+ masm.vpand(xmm5, xmm5, xmm4, AVXSize.XMM);
+ masm.vpsrld(xmm3, t, 31, AVXSize.XMM);
+ masm.vpslld(xmm4, t, 1, AVXSize.XMM);
+ masm.vpslldq(xmm3, xmm3, 4, AVXSize.XMM);
+ masm.vpxor(t, xmm4, xmm3, AVXSize.XMM); // t holds p(x) <<1 or H * 2
+
+ // Adding p(x)<<1 to xmm5 which holds the reduction polynomial
+ masm.vpxor(t, t, xmm5, AVXSize.XMM);
+ masm.movdqu(new AMD64Address(htbl, 1 * 16), t); // H * 2
+ }
+
+ /**
+ * This method takes the subkey after expansion as input and generates the remaining powers of
+ * subkey H. The power of H is used in reduction process for eight block ghash.
+ */
+ private static void generateHtblEightBlocks(AMD64MacroAssembler masm, Register htbl) {
+ Register t = xmm13;
+ Register tmp0 = xmm1;
+
+ masm.movdqu(t, new AMD64Address(htbl, 1 * 16));
+ masm.movdqu(tmp0, t);
+
+ // tmp0 and t hold H. Now we compute powers of H by using GFMUL(H, H)
+ gfmul(masm, tmp0, t);
+ masm.movdqu(new AMD64Address(htbl, 2 * 16), t); // H ^ 2 * 2
+ gfmul(masm, tmp0, t);
+ masm.movdqu(new AMD64Address(htbl, 3 * 16), t); // H ^ 3 * 2
+ gfmul(masm, tmp0, t);
+ masm.movdqu(new AMD64Address(htbl, 4 * 16), t); // H ^ 4 * 2
+ gfmul(masm, tmp0, t);
+ masm.movdqu(new AMD64Address(htbl, 5 * 16), t); // H ^ 5 * 2
+ gfmul(masm, tmp0, t);
+ masm.movdqu(new AMD64Address(htbl, 6 * 16), t); // H ^ 6 * 2
+ gfmul(masm, tmp0, t);
+ masm.movdqu(new AMD64Address(htbl, 7 * 16), t); // H ^ 7 * 2
+ gfmul(masm, tmp0, t);
+ masm.movdqu(new AMD64Address(htbl, 8 * 16), t); // H ^ 8 * 2
+ }
+}
diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64HasNegativesOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64HasNegativesOp.java
index 1c15e233c0bb..c076b45d275f 100644
--- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64HasNegativesOp.java
+++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64HasNegativesOp.java
@@ -37,6 +37,7 @@
import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.asm.amd64.AMD64Address;
+import org.graalvm.compiler.asm.amd64.AVXKind;
import org.graalvm.compiler.core.common.Stride;
import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag;
import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
@@ -183,14 +184,14 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
masm.bind(labelCompareWideVectors);
masm.vmovdqu(vec1, new AMD64Address(ary1, len, Stride.S1));
- masm.vptest(vec1, vec2);
+ masm.vptest(vec1, vec2, AVXKind.AVXSize.YMM);
masm.jcc(ConditionFlag.NotZero, labelTrue);
masm.addqAndJcc(len, 32, ConditionFlag.NotZero, labelCompareWideVectors, false);
masm.testlAndJcc(result, result, ConditionFlag.Zero, labelFalse, false);
masm.vmovdqu(vec1, new AMD64Address(ary1, result, Stride.S1, -32));
- masm.vptest(vec1, vec2);
+ masm.vptest(vec1, vec2, AVXKind.AVXSize.YMM);
masm.jccb(ConditionFlag.NotZero, labelTrue);
masm.jmp(labelFalse);
diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathCosOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathCosOp.java
index 8783b8108264..9522140631df 100644
--- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathCosOp.java
+++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathCosOp.java
@@ -202,10 +202,15 @@
*/
// @formatter:off
@StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86_cos.cpp",
- lineStart = 0,
- lineEnd = 630,
- commit = "e58c12e61828485bfffbc9d1b865302b93a94158",
- sha1 = "cb83822ed974ba4181ff2d55869b301686e0c8c3")
+ lineStart = 34,
+ lineEnd = 612,
+ commit = "f3be6731d3fa4fb1b7fc42c5bcbe6a64a50eaf42",
+ sha1 = "9339dc67800971e1d45dd878394cb650a36ffb03")
+@StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86_constants.cpp",
+ lineStart = 29,
+ lineEnd = 236,
+ commit = "f3be6731d3fa4fb1b7fc42c5bcbe6a64a50eaf42",
+ sha1 = "f89c1da45b2e91cb114e68cbe20ea6fff3bae315")
// @formatter:on
public final class AMD64MathCosOp extends AMD64MathIntrinsicUnaryOp {
diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathExpOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathExpOp.java
index 6e234aa2b631..a90dfb21176d 100644
--- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathExpOp.java
+++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathExpOp.java
@@ -88,10 +88,10 @@
*/
// @formatter:off
@StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86_exp.cpp",
- lineStart = 0,
- lineEnd = 406,
- commit = "e58c12e61828485bfffbc9d1b865302b93a94158",
- sha1 = "e8777563cb0f0f275a490992a36bbdf06bb4c4af")
+ lineStart = 35,
+ lineEnd = 391,
+ commit = "f3be6731d3fa4fb1b7fc42c5bcbe6a64a50eaf42",
+ sha1 = "b0e25b2f08183418668966dee8f3c4cd2318aaef")
// @formatter:on
public final class AMD64MathExpOp extends AMD64MathIntrinsicUnaryOp {
diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathLog10Op.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathLog10Op.java
index 51061a6866ed..59df96ee8aa4 100644
--- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathLog10Op.java
+++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathLog10Op.java
@@ -81,10 +81,10 @@
*/
// @formatter:off
@StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86_log10.cpp",
- lineStart = 0,
- lineEnd = 382,
- commit = "e58c12e61828485bfffbc9d1b865302b93a94158",
- sha1 = "e03b4280eebe9392433389ab16c4aa52bb01270b")
+ lineStart = 34,
+ lineEnd = 383,
+ commit = "f3be6731d3fa4fb1b7fc42c5bcbe6a64a50eaf42",
+ sha1 = "ad12a2bd143c9b4af247bfe2cd97c5aeacbfcfd1")
// @formatter:on
public final class AMD64MathLog10Op extends AMD64MathIntrinsicUnaryOp {
diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathLogOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathLogOp.java
index b19658ce20cb..0c894d6d60b8 100644
--- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathLogOp.java
+++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathLogOp.java
@@ -83,10 +83,10 @@
*/
// @formatter:off
@StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86_log.cpp",
- lineStart = 0,
- lineEnd = 362,
- commit = "e58c12e61828485bfffbc9d1b865302b93a94158",
- sha1 = "4fc26bdb838040042ba0a4f5c04d737705ad4a7a")
+ lineStart = 34,
+ lineEnd = 363,
+ commit = "f3be6731d3fa4fb1b7fc42c5bcbe6a64a50eaf42",
+ sha1 = "2482010183721b57ae47d581c800226ec001491a")
// @formatter:on
public final class AMD64MathLogOp extends AMD64MathIntrinsicUnaryOp {
diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathPowOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathPowOp.java
index de6dfea43f42..b7a8962a9490 100644
--- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathPowOp.java
+++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathPowOp.java
@@ -112,10 +112,10 @@
*/
// @formatter:off
@StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86_pow.cpp",
- lineStart = 0,
- lineEnd = 1880,
- commit = "e58c12e61828485bfffbc9d1b865302b93a94158",
- sha1 = "ff1905731c30cf343460e72d58537d4672b0dce2")
+ lineStart = 35,
+ lineEnd = 1881,
+ commit = "f3be6731d3fa4fb1b7fc42c5bcbe6a64a50eaf42",
+ sha1 = "45d182416a75a945c13990e76ec07c604c78825a")
// @formatter:on
public final class AMD64MathPowOp extends AMD64MathIntrinsicBinaryOp {
diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathSinOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathSinOp.java
index a85df4bbe948..689d0ac58301 100644
--- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathSinOp.java
+++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathSinOp.java
@@ -205,10 +205,10 @@
*/
// @formatter:off
@StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86_sin.cpp",
- lineStart = 0,
- lineEnd = 848,
- commit = "e58c12e61828485bfffbc9d1b865302b93a94158",
- sha1 = "4ac9bd6f8b98df9a93ab8ef7de250421605b323c")
+ lineStart = 35,
+ lineEnd = 636,
+ commit = "f3be6731d3fa4fb1b7fc42c5bcbe6a64a50eaf42",
+ sha1 = "35e247db6760c377bb6694dfe98559b8b2eaf8c0")
// @formatter:on
public final class AMD64MathSinOp extends AMD64MathIntrinsicUnaryOp {
diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathTanOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathTanOp.java
index d4c65b7f1641..14098c1d21cf 100644
--- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathTanOp.java
+++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathTanOp.java
@@ -131,10 +131,10 @@
*/
// @formatter:off
@StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86_tan.cpp",
- lineStart = 0,
- lineEnd = 1059,
- commit = "e58c12e61828485bfffbc9d1b865302b93a94158",
- sha1 = "1f1f3a6d2437b250c0d5b13e596d9ed5a14c869e")
+ lineStart = 34,
+ lineEnd = 1020,
+ commit = "f3be6731d3fa4fb1b7fc42c5bcbe6a64a50eaf42",
+ sha1 = "8a617c23d7eb9c1687a19b01d15b661cd636fe2e")
// @formatter:on
public final class AMD64MathTanOp extends AMD64MathIntrinsicUnaryOp {
diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64RoundFloatToIntegerOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64RoundFloatToIntegerOp.java
index ef2a474c7f19..150c407507d9 100644
--- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64RoundFloatToIntegerOp.java
+++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64RoundFloatToIntegerOp.java
@@ -49,9 +49,9 @@
*/
// @formatter:off
@StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86.cpp",
- lineStart = 9210,
- lineEnd = 9306,
- commit = "d00e7b92b4a6d33f5db6e2aedce5e058832a23de",
+ lineStart = 9243,
+ lineEnd = 9339,
+ commit = "926380d3b748fd591f45abc99c497abc62c52565",
sha1 = "7bb09de1deee91732af6a55f527c53eb33dec489")
@StubPort(path = "src/hotspot/cpu/x86/stubGenerator_x86_64.cpp",
lineStart = 641,
diff --git a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64VectorizedMismatchOp.java b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64VectorizedMismatchOp.java
index 0e62e3ca18f1..03ab2c118296 100644
--- a/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64VectorizedMismatchOp.java
+++ b/compiler/src/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64VectorizedMismatchOp.java
@@ -63,9 +63,9 @@
*/
// @formatter:off
@StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86.cpp",
- lineStart = 6380,
- lineEnd = 6598,
- commit = "d00e7b92b4a6d33f5db6e2aedce5e058832a23de",
+ lineStart = 6413,
+ lineEnd = 6631,
+ commit = "926380d3b748fd591f45abc99c497abc62c52565",
sha1 = "128d88224b8fc7fa9283072966a28c14fdc1eda5")
// @formatter:on
@Opcode("VECTORIZED_MISMATCH")
diff --git a/compiler/src/org.graalvm.compiler.lir.processor/src/org/graalvm/compiler/lir/processor/StubPortProcessor.java b/compiler/src/org.graalvm.compiler.lir.processor/src/org/graalvm/compiler/lir/processor/StubPortProcessor.java
index 6446a388a0b1..b6d249f7fcf2 100644
--- a/compiler/src/org.graalvm.compiler.lir.processor/src/org/graalvm/compiler/lir/processor/StubPortProcessor.java
+++ b/compiler/src/org.graalvm.compiler.lir.processor/src/org/graalvm/compiler/lir/processor/StubPortProcessor.java
@@ -130,7 +130,7 @@ private static int find(Proxy proxy, String oldUrl, String newUrl, int lineStart
String oldSnippet = oldUrlIn.lines().skip(lineStart).limit(lineEnd - lineStart).collect(Collectors.joining("\n"));
int newLineStart = Math.max(0, lineStart - SEARCH_RANGE);
int newLineEnd = lineEnd + SEARCH_RANGE;
- String newFullFile = newUrlIn.lines().skip(newLineStart).limit(newLineEnd - lineStart).collect(Collectors.joining("\n"));
+ String newFullFile = newUrlIn.lines().skip(newLineStart).limit(newLineEnd - newLineStart).collect(Collectors.joining("\n"));
int idx = newFullFile.indexOf(oldSnippet);
if (idx != -1) {
return newLineStart + newFullFile.substring(0, idx).split("\n").length;
@@ -170,11 +170,6 @@ protected boolean doProcess(Set extends TypeElement> annotations, RoundEnviron
proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(proxyURI.getHost(), proxyURI.getPort()));
}
- if (proxyEnv != null) {
- URI proxyURI = new URI(System.getenv(HTTPS_PROXY_ENV_VAR));
- proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(proxyURI.getHost(), proxyURI.getPort()));
- }
-
for (Element element : roundEnv.getElementsAnnotatedWith(tStubPort)) {
compareDigest(md, getAnnotation(element, tStubPort.asType()), element, proxy);
}
diff --git a/compiler/src/org.graalvm.compiler.lir/src/org/graalvm/compiler/lir/gen/LIRGeneratorTool.java b/compiler/src/org.graalvm.compiler.lir/src/org/graalvm/compiler/lir/gen/LIRGeneratorTool.java
index 5a73c532fcdc..9973a5526285 100644
--- a/compiler/src/org.graalvm.compiler.lir/src/org/graalvm/compiler/lir/gen/LIRGeneratorTool.java
+++ b/compiler/src/org.graalvm.compiler.lir/src/org/graalvm/compiler/lir/gen/LIRGeneratorTool.java
@@ -326,6 +326,11 @@ default void emitAESDecrypt(Value from, Value to, Value key) {
throw GraalError.unimplemented("No specialized implementation available");
}
+ @SuppressWarnings("unused")
+ default void emitGHASHProcessBlocks(Value state, Value hashSubkey, Value data, Value blocks) {
+ throw GraalError.unimplemented("No specialized implementation available");
+ }
+
void emitBlackhole(Value operand);
LIRKind getLIRKind(Stamp stamp);
diff --git a/compiler/src/org.graalvm.compiler.replacements.aarch64/src/org/graalvm/compiler/replacements/aarch64/AArch64GraphBuilderPlugins.java b/compiler/src/org.graalvm.compiler.replacements.aarch64/src/org/graalvm/compiler/replacements/aarch64/AArch64GraphBuilderPlugins.java
index b7a9e6dc9b4f..bcd35f56d434 100644
--- a/compiler/src/org.graalvm.compiler.replacements.aarch64/src/org/graalvm/compiler/replacements/aarch64/AArch64GraphBuilderPlugins.java
+++ b/compiler/src/org.graalvm.compiler.replacements.aarch64/src/org/graalvm/compiler/replacements/aarch64/AArch64GraphBuilderPlugins.java
@@ -71,6 +71,7 @@
import org.graalvm.compiler.replacements.SnippetSubstitutionInvocationPlugin;
import org.graalvm.compiler.replacements.SnippetTemplate;
import org.graalvm.compiler.replacements.StandardGraphBuilderPlugins.AESCryptPlugin;
+import org.graalvm.compiler.replacements.StandardGraphBuilderPlugins.GHASHPlugin;
import org.graalvm.compiler.replacements.StandardGraphBuilderPlugins.StringLatin1IndexOfCharPlugin;
import org.graalvm.compiler.replacements.StringLatin1InflateNode;
import org.graalvm.compiler.replacements.StringLatin1Snippets;
@@ -115,6 +116,7 @@ public void run() {
}
registerStringCodingPlugins(invocationPlugins, replacements);
registerAESPlugins(invocationPlugins, replacements, arch);
+ registerGHASHPlugin(invocationPlugins, replacements, arch);
}
});
}
@@ -559,9 +561,22 @@ private static boolean supports(AArch64 arch, CPUFeature... features) {
return true;
}
+ public static boolean supportsAESPlugins(AArch64 arch) {
+ return supports(arch, CPUFeature.AES);
+ }
+
private static void registerAESPlugins(InvocationPlugins plugins, Replacements replacements, AArch64 arch) {
Registration r = new Registration(plugins, "com.sun.crypto.provider.AESCrypt", replacements);
- r.registerConditional(supports(arch, CPUFeature.AES), new AESCryptPlugin(ENCRYPT));
- r.registerConditional(supports(arch, CPUFeature.AES), new AESCryptPlugin(DECRYPT));
+ r.registerConditional(supportsAESPlugins(arch), new AESCryptPlugin(ENCRYPT));
+ r.registerConditional(supportsAESPlugins(arch), new AESCryptPlugin(DECRYPT));
+ }
+
+ public static boolean supportsGHASHPlugins(AArch64 arch) {
+ return supports(arch, CPUFeature.PMULL);
+ }
+
+ private static void registerGHASHPlugin(InvocationPlugins plugins, Replacements replacements, AArch64 arch) {
+ Registration r = new Registration(plugins, "com.sun.crypto.provider.GHASH", replacements);
+ r.registerConditional(supportsGHASHPlugins(arch), new GHASHPlugin());
}
}
diff --git a/compiler/src/org.graalvm.compiler.replacements.amd64/src/org/graalvm/compiler/replacements/amd64/AMD64GraphBuilderPlugins.java b/compiler/src/org.graalvm.compiler.replacements.amd64/src/org/graalvm/compiler/replacements/amd64/AMD64GraphBuilderPlugins.java
index a5b0ee7bf590..61e057a1190b 100644
--- a/compiler/src/org.graalvm.compiler.replacements.amd64/src/org/graalvm/compiler/replacements/amd64/AMD64GraphBuilderPlugins.java
+++ b/compiler/src/org.graalvm.compiler.replacements.amd64/src/org/graalvm/compiler/replacements/amd64/AMD64GraphBuilderPlugins.java
@@ -74,8 +74,9 @@
import org.graalvm.compiler.replacements.InvocationPluginHelper;
import org.graalvm.compiler.replacements.SnippetSubstitutionInvocationPlugin;
import org.graalvm.compiler.replacements.SnippetTemplate;
-import org.graalvm.compiler.replacements.StandardGraphBuilderPlugins;
import org.graalvm.compiler.replacements.StandardGraphBuilderPlugins.AESCryptPlugin;
+import org.graalvm.compiler.replacements.StandardGraphBuilderPlugins.ArrayEqualsInvocationPlugin;
+import org.graalvm.compiler.replacements.StandardGraphBuilderPlugins.GHASHPlugin;
import org.graalvm.compiler.replacements.StandardGraphBuilderPlugins.StringLatin1IndexOfCharPlugin;
import org.graalvm.compiler.replacements.StringLatin1InflateNode;
import org.graalvm.compiler.replacements.StringLatin1Snippets;
@@ -124,6 +125,7 @@ public void run() {
registerArraysEqualsPlugins(invocationPlugins, replacements);
registerStringCodingPlugins(invocationPlugins, replacements);
registerAESPlugins(invocationPlugins, replacements, arch);
+ registerGHASHPlugin(invocationPlugins, replacements, arch);
}
});
}
@@ -539,8 +541,8 @@ public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Rec
private static void registerArraysEqualsPlugins(InvocationPlugins plugins, Replacements replacements) {
Registration r = new Registration(plugins, Arrays.class, replacements);
- r.register(new StandardGraphBuilderPlugins.ArrayEqualsInvocationPlugin(JavaKind.Float, float[].class, float[].class));
- r.register(new StandardGraphBuilderPlugins.ArrayEqualsInvocationPlugin(JavaKind.Double, double[].class, double[].class));
+ r.register(new ArrayEqualsInvocationPlugin(JavaKind.Float, float[].class, float[].class));
+ r.register(new ArrayEqualsInvocationPlugin(JavaKind.Double, double[].class, double[].class));
}
private static void registerStringCodingPlugins(InvocationPlugins plugins, Replacements replacements) {
@@ -634,9 +636,22 @@ private static boolean supports(AMD64 arch, CPUFeature... features) {
return true;
}
+ public static boolean supportsAESPlugins(AMD64 arch) {
+ return supports(arch, CPUFeature.AVX, CPUFeature.AES);
+ }
+
private static void registerAESPlugins(InvocationPlugins plugins, Replacements replacements, AMD64 arch) {
Registration r = new Registration(plugins, "com.sun.crypto.provider.AESCrypt", replacements);
- r.registerConditional(supports(arch, CPUFeature.AVX, CPUFeature.AES), new AESCryptPlugin(ENCRYPT));
- r.registerConditional(supports(arch, CPUFeature.AVX, CPUFeature.AES), new AESCryptPlugin(DECRYPT));
+ r.registerConditional(supportsAESPlugins(arch), new AESCryptPlugin(ENCRYPT));
+ r.registerConditional(supportsAESPlugins(arch), new AESCryptPlugin(DECRYPT));
+ }
+
+ public static boolean supportsGHASHPlugins(AMD64 arch) {
+ return supports(arch, CPUFeature.SSSE3, CPUFeature.CLMUL);
+ }
+
+ private static void registerGHASHPlugin(InvocationPlugins plugins, Replacements replacements, AMD64 arch) {
+ Registration r = new Registration(plugins, "com.sun.crypto.provider.GHASH", replacements);
+ r.registerConditional(supportsGHASHPlugins(arch), new GHASHPlugin());
}
}
diff --git a/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/StandardGraphBuilderPlugins.java b/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/StandardGraphBuilderPlugins.java
index 42359a29eaf2..ec554e9a22b7 100644
--- a/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/StandardGraphBuilderPlugins.java
+++ b/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/StandardGraphBuilderPlugins.java
@@ -163,6 +163,7 @@
import org.graalvm.compiler.replacements.nodes.AESNode.CryptMode;
import org.graalvm.compiler.replacements.nodes.ArrayEqualsNode;
import org.graalvm.compiler.replacements.nodes.ArrayIndexOfNode;
+import org.graalvm.compiler.replacements.nodes.GHASHProcessBlocksNode;
import org.graalvm.compiler.replacements.nodes.LogNode;
import org.graalvm.compiler.replacements.nodes.MacroNode.MacroParams;
import org.graalvm.compiler.replacements.nodes.ProfileBooleanNode;
@@ -2086,4 +2087,23 @@ public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Rec
return true;
}
}
+
+ public static class GHASHPlugin extends InvocationPlugin {
+
+ public GHASHPlugin() {
+ super("processBlocks", byte[].class, int.class, int.class, long[].class, long[].class);
+ }
+
+ @Override
+ public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Receiver receiver,
+ ValueNode data, ValueNode inOffset, ValueNode blocks, ValueNode state, ValueNode hashSubkey) {
+ try (InvocationPluginHelper helper = new InvocationPluginHelper(b, targetMethod)) {
+ ValueNode dataAddress = helper.arrayElementPointer(data, JavaKind.Byte, inOffset);
+ ValueNode stateAddress = helper.arrayStart(state, JavaKind.Long);
+ ValueNode hashSubkeyAddress = helper.arrayStart(hashSubkey, JavaKind.Long);
+ b.add(new GHASHProcessBlocksNode(stateAddress, hashSubkeyAddress, dataAddress, blocks));
+ return true;
+ }
+ }
+ }
}
diff --git a/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/CryptoForeignCalls.java b/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/CryptoForeignCalls.java
index 8f2b4f56f0c3..63d1d78f45d5 100644
--- a/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/CryptoForeignCalls.java
+++ b/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/CryptoForeignCalls.java
@@ -25,24 +25,22 @@
package org.graalvm.compiler.replacements.nodes;
import org.graalvm.compiler.core.common.spi.ForeignCallDescriptor;
-import org.graalvm.compiler.nodes.NamedLocationIdentity;
import org.graalvm.word.LocationIdentity;
import org.graalvm.word.Pointer;
-import jdk.vm.ci.meta.JavaKind;
-
public class CryptoForeignCalls {
- public static final ForeignCallDescriptor STUB_AES_ENCRYPT = foreignCallDescriptor("aesEncrypt", Pointer.class, Pointer.class, Pointer.class);
- public static final ForeignCallDescriptor STUB_AES_DECRYPT = foreignCallDescriptor("aesDecrypt", Pointer.class, Pointer.class, Pointer.class);
+ public static final ForeignCallDescriptor STUB_AES_ENCRYPT = foreignCallDescriptor("aesEncrypt", AESNode.KILLED_LOCATIONS, Pointer.class, Pointer.class, Pointer.class);
+ public static final ForeignCallDescriptor STUB_AES_DECRYPT = foreignCallDescriptor("aesDecrypt", AESNode.KILLED_LOCATIONS, Pointer.class, Pointer.class, Pointer.class);
- public static final LocationIdentity[] KILLED_LOCATIONS = {NamedLocationIdentity.getArrayLocation(JavaKind.Byte)};
+ public static final ForeignCallDescriptor STUB_GHASH_PROCESS_BLOCKS = foreignCallDescriptor("ghashProcessBlocks", GHASHProcessBlocksNode.KILLED_LOCATIONS,
+ Pointer.class, Pointer.class, Pointer.class, Pointer.class);
- public static final ForeignCallDescriptor[] STUBS = {
+ public static final ForeignCallDescriptor[] AES_STUBS = {
STUB_AES_ENCRYPT,
STUB_AES_DECRYPT};
- private static ForeignCallDescriptor foreignCallDescriptor(String name, Class>... argTypes) {
- return new ForeignCallDescriptor(name, void.class, argTypes, false, KILLED_LOCATIONS, false, false);
+ private static ForeignCallDescriptor foreignCallDescriptor(String name, LocationIdentity[] killLocations, Class>... argTypes) {
+ return new ForeignCallDescriptor(name, void.class, argTypes, false, killLocations, false, false);
}
}
diff --git a/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/GHASHProcessBlocksNode.java b/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/GHASHProcessBlocksNode.java
new file mode 100644
index 000000000000..729a9f0b5b82
--- /dev/null
+++ b/compiler/src/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/GHASHProcessBlocksNode.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.graalvm.compiler.replacements.nodes;
+
+import java.util.EnumSet;
+
+import org.graalvm.compiler.core.common.spi.ForeignCallDescriptor;
+import org.graalvm.compiler.core.common.type.StampFactory;
+import org.graalvm.compiler.graph.NodeClass;
+import org.graalvm.compiler.lir.GenerateStub;
+import org.graalvm.compiler.nodeinfo.InputType;
+import org.graalvm.compiler.nodeinfo.NodeCycles;
+import org.graalvm.compiler.nodeinfo.NodeInfo;
+import org.graalvm.compiler.nodeinfo.NodeSize;
+import org.graalvm.compiler.nodes.NamedLocationIdentity;
+import org.graalvm.compiler.nodes.ValueNode;
+import org.graalvm.compiler.nodes.spi.NodeLIRBuilderTool;
+import org.graalvm.word.LocationIdentity;
+import org.graalvm.word.Pointer;
+
+import jdk.vm.ci.meta.JavaKind;
+
+@NodeInfo(allowedUsageTypes = {InputType.Memory}, cycles = NodeCycles.CYCLES_128, size = NodeSize.SIZE_128)
+public class GHASHProcessBlocksNode extends MemoryKillStubIntrinsicNode {
+
+ public static final NodeClass TYPE = NodeClass.create(GHASHProcessBlocksNode.class);
+ public static final LocationIdentity[] KILLED_LOCATIONS = {NamedLocationIdentity.getArrayLocation(JavaKind.Long)};
+
+ @Input protected ValueNode state;
+ @Input protected ValueNode hashSubkey;
+ @Input protected ValueNode data;
+ @Input protected ValueNode blocks;
+
+ public GHASHProcessBlocksNode(ValueNode state, ValueNode hashSubkey, ValueNode data, ValueNode blocks) {
+ this(state,
+ hashSubkey,
+ data,
+ blocks,
+ null);
+ }
+
+ public GHASHProcessBlocksNode(ValueNode state, ValueNode hashSubkey, ValueNode data, ValueNode blocks, EnumSet> runtimeCheckedCPUFeatures) {
+ super(TYPE, StampFactory.forVoid(), runtimeCheckedCPUFeatures, LocationIdentity.any());
+ this.state = state;
+ this.hashSubkey = hashSubkey;
+ this.data = data;
+ this.blocks = blocks;
+ }
+
+ @Override
+ public ValueNode[] getForeignCallArguments() {
+ return new ValueNode[]{state, hashSubkey, data, blocks};
+ }
+
+ @Override
+ public LocationIdentity[] getKilledLocationIdentities() {
+ return KILLED_LOCATIONS;
+ }
+
+ @NodeIntrinsic
+ @GenerateStub(name = "ghashProcessBlocks")
+ public static native void apply(Pointer state,
+ Pointer hashSubkey,
+ Pointer data,
+ Pointer blocks);
+
+ @NodeIntrinsic
+ public static native void apply(Pointer state,
+ Pointer hashSubkey,
+ Pointer data,
+ Pointer blocks,
+ @ConstantNodeParameter EnumSet> runtimeCheckedCPUFeatures);
+
+ @Override
+ public ForeignCallDescriptor getForeignCallDescriptor() {
+ return CryptoForeignCalls.STUB_GHASH_PROCESS_BLOCKS;
+ }
+
+ @Override
+ public void emitIntrinsic(NodeLIRBuilderTool gen) {
+ gen.getLIRGeneratorTool().emitGHASHProcessBlocks(gen.operand(state), gen.operand(hashSubkey), gen.operand(data), gen.operand(blocks));
+ }
+}
diff --git a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/cpufeature/Stubs.java b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/cpufeature/Stubs.java
index 8dec802fe2f8..c4ad53a41334 100644
--- a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/cpufeature/Stubs.java
+++ b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/cpufeature/Stubs.java
@@ -27,6 +27,7 @@
import static jdk.vm.ci.amd64.AMD64.CPUFeature.AES;
import static jdk.vm.ci.amd64.AMD64.CPUFeature.AVX;
import static jdk.vm.ci.amd64.AMD64.CPUFeature.AVX2;
+import static jdk.vm.ci.amd64.AMD64.CPUFeature.CLMUL;
import static jdk.vm.ci.amd64.AMD64.CPUFeature.POPCNT;
import static jdk.vm.ci.amd64.AMD64.CPUFeature.SSE3;
import static jdk.vm.ci.amd64.AMD64.CPUFeature.SSE4_1;
@@ -39,6 +40,7 @@
import org.graalvm.compiler.debug.GraalError;
import org.graalvm.compiler.nodes.ValueNode;
import org.graalvm.compiler.replacements.nodes.AESNode;
+import org.graalvm.compiler.replacements.nodes.GHASHProcessBlocksNode;
import org.graalvm.nativeimage.ImageSingletons;
import org.graalvm.nativeimage.Platform;
import org.graalvm.nativeimage.Platforms;
@@ -62,11 +64,15 @@ public static class AMD64Features {
AVX,
AVX2);
public static final EnumSet AES_CPU_FEATURES_AMD64 = EnumSet.of(AVX, AES);
+ public static final EnumSet GHASH_CPU_FEATURES_AMD64 = EnumSet.of(AVX, CLMUL);
public static EnumSet getRequiredCPUFeatures(Class extends ValueNode> klass) {
if (AESNode.class.equals(klass)) {
return AES_CPU_FEATURES_AMD64;
}
+ if (GHASHProcessBlocksNode.class.equals(klass)) {
+ return GHASH_CPU_FEATURES_AMD64;
+ }
return RUNTIME_CHECKED_CPU_FEATURES_AMD64;
}
}
@@ -75,11 +81,15 @@ public static EnumSet getRequiredCPUFeatures(Class extends V
public static class AArch64Features {
public static final EnumSet EMPTY_CPU_FEATURES_AARCH64 = EnumSet.noneOf(AArch64.CPUFeature.class);
public static final EnumSet AES_CPU_FEATURES_AARCH64 = EnumSet.of(AArch64.CPUFeature.AES);
+ public static final EnumSet GHASH_CPU_FEATURES_AARCH64 = EnumSet.of(AArch64.CPUFeature.PMULL);
public static EnumSet getRequiredCPUFeatures(Class extends ValueNode> klass) {
if (AESNode.class.equals(klass)) {
return AES_CPU_FEATURES_AARCH64;
}
+ if (GHASHProcessBlocksNode.class.equals(klass)) {
+ return GHASH_CPU_FEATURES_AARCH64;
+ }
return EMPTY_CPU_FEATURES_AARCH64;
}
}
diff --git a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AARCH64StubForeignCallsFeature.java b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AArch64StubForeignCallsFeature.java
similarity index 82%
rename from substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AARCH64StubForeignCallsFeature.java
rename to substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AArch64StubForeignCallsFeature.java
index 7fcd02949eb9..ee90631c3969 100644
--- a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AARCH64StubForeignCallsFeature.java
+++ b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AArch64StubForeignCallsFeature.java
@@ -26,6 +26,7 @@
import static com.oracle.svm.core.cpufeature.Stubs.AArch64Features.AES_CPU_FEATURES_AARCH64;
import static com.oracle.svm.core.cpufeature.Stubs.AArch64Features.EMPTY_CPU_FEATURES_AARCH64;
+import static com.oracle.svm.core.cpufeature.Stubs.AArch64Features.GHASH_CPU_FEATURES_AARCH64;
import org.graalvm.compiler.replacements.nodes.ArrayIndexOfForeignCalls;
import org.graalvm.compiler.replacements.nodes.CryptoForeignCalls;
@@ -36,12 +37,13 @@
@AutomaticFeature
@Platforms(AARCH64.class)
-public class AARCH64StubForeignCallsFeature extends StubForeignCallsFeatureBase {
+public class AArch64StubForeignCallsFeature extends StubForeignCallsFeatureBase {
- public AARCH64StubForeignCallsFeature() {
+ public AArch64StubForeignCallsFeature() {
super(new StubDescriptor[]{
new StubDescriptor(ArrayIndexOfForeignCalls.STUBS_AARCH64, true, EMPTY_CPU_FEATURES_AARCH64, EMPTY_CPU_FEATURES_AARCH64),
- new StubDescriptor(CryptoForeignCalls.STUBS, false, AES_CPU_FEATURES_AARCH64, AES_CPU_FEATURES_AARCH64),
+ new StubDescriptor(CryptoForeignCalls.AES_STUBS, false, AES_CPU_FEATURES_AARCH64, AES_CPU_FEATURES_AARCH64),
+ new StubDescriptor(CryptoForeignCalls.STUB_GHASH_PROCESS_BLOCKS, false, GHASH_CPU_FEATURES_AARCH64, GHASH_CPU_FEATURES_AARCH64),
});
}
}
diff --git a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AMD64StubForeignCallsFeature.java b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AMD64StubForeignCallsFeature.java
index de21310f1432..23e65eb5b7b7 100644
--- a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AMD64StubForeignCallsFeature.java
+++ b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/AMD64StubForeignCallsFeature.java
@@ -25,8 +25,11 @@
package com.oracle.svm.graal.stubs;
import static com.oracle.svm.core.cpufeature.Stubs.AMD64Features.AES_CPU_FEATURES_AMD64;
+import static com.oracle.svm.core.cpufeature.Stubs.AMD64Features.GHASH_CPU_FEATURES_AMD64;
import static com.oracle.svm.core.cpufeature.Stubs.AMD64Features.RUNTIME_CHECKED_CPU_FEATURES_AMD64;
+import static jdk.vm.ci.amd64.AMD64.CPUFeature.CLMUL;
import static jdk.vm.ci.amd64.AMD64.CPUFeature.SSE2;
+import static jdk.vm.ci.amd64.AMD64.CPUFeature.SSSE3;
import java.util.EnumSet;
@@ -64,7 +67,8 @@ public AMD64StubForeignCallsFeature() {
new StubDescriptor(ArrayRegionCompareToForeignCalls.STUBS, true, BASELINE, RUNTIME_CHECKED_CPU_FEATURES_AMD64),
new StubDescriptor(VectorizedMismatchForeignCalls.STUB, true, BASELINE, RUNTIME_CHECKED_CPU_FEATURES_AMD64),
new StubDescriptor(VectorizedMismatchForeignCalls.STUB, true, BASELINE, RUNTIME_CHECKED_CPU_FEATURES_AMD64),
- new StubDescriptor(CryptoForeignCalls.STUBS, false, AES_CPU_FEATURES_AMD64, AES_CPU_FEATURES_AMD64),
+ new StubDescriptor(CryptoForeignCalls.AES_STUBS, false, AES_CPU_FEATURES_AMD64, AES_CPU_FEATURES_AMD64),
+ new StubDescriptor(CryptoForeignCalls.STUB_GHASH_PROCESS_BLOCKS, false, EnumSet.of(SSSE3, CLMUL), GHASH_CPU_FEATURES_AMD64),
});
}
}
diff --git a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/SVMIntrinsicStubs.java b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/SVMIntrinsicStubs.java
index 1c3a6bcb526d..95c6c157f782 100644
--- a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/SVMIntrinsicStubs.java
+++ b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/stubs/SVMIntrinsicStubs.java
@@ -34,6 +34,7 @@
import org.graalvm.compiler.replacements.nodes.ArrayIndexOfNode;
import org.graalvm.compiler.replacements.nodes.ArrayRegionCompareToNode;
import org.graalvm.compiler.replacements.nodes.ArrayRegionEqualsNode;
+import org.graalvm.compiler.replacements.nodes.GHASHProcessBlocksNode;
import org.graalvm.compiler.replacements.nodes.VectorizedMismatchNode;
@GeneratedStubsHolder(targetVM = "substrate", sources = {
@@ -47,6 +48,7 @@
AMD64ArrayRegionEqualsWithMaskNode.class,
AMD64CalcStringAttributesNode.class,
AESNode.class,
+ GHASHProcessBlocksNode.class,
})
public final class SVMIntrinsicStubs {
}