Skip to content

Commit c3798de

Browse files
committed
[GR-23179] Port GHASH stub.
PullRequest: graal/12508
2 parents 86e8eba + d19017f commit c3798de

File tree

41 files changed

+2164
-176
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+2164
-176
lines changed

compiler/mx.compiler/suite.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,8 @@
567567
"dependencies" : [
568568
"JVMCI_HOTSPOT",
569569
"org.graalvm.compiler.api.runtime",
570-
"org.graalvm.compiler.replacements",
570+
"org.graalvm.compiler.replacements.amd64",
571+
"org.graalvm.compiler.replacements.aarch64",
571572
"org.graalvm.compiler.printer",
572573
"org.graalvm.compiler.runtime",
573574
],
@@ -635,7 +636,6 @@
635636
"dependencies" : [
636637
"org.graalvm.compiler.core.aarch64",
637638
"org.graalvm.compiler.hotspot",
638-
"org.graalvm.compiler.replacements.aarch64",
639639
],
640640
"requires" : [
641641
"jdk.unsupported" # sun.misc.Unsafe
@@ -653,7 +653,6 @@
653653
"sourceDirs" : ["src"],
654654
"dependencies" : [
655655
"org.graalvm.compiler.core.amd64",
656-
"org.graalvm.compiler.replacements.amd64",
657656
"org.graalvm.compiler.hotspot",
658657
],
659658
"requiresConcealed" : {

compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDAssembler.java

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,7 @@ public enum ASIMDInstruction {
562562
INSGEN(0b0011 << 11),
563563
SMOV(0b0101 << 11),
564564
UMOV(0b0111 << 11),
565+
INSELEM(0b1 << 29),
565566

566567
/* Advanced SIMD two-register miscellaneous (C4-361). */
567568
/* size xx */
@@ -590,6 +591,8 @@ public enum ASIMDInstruction {
590591
NEG(UBit | 0b01011 << 12),
591592
/* UBit 1, size 00 */
592593
NOT(UBit | 0b00101 << 12),
594+
/* UBit 1, size 01 */
595+
RBIT(UBit | 0b00101 << 12),
593596
/* UBit 1, size 1x */
594597
FCMGE_ZERO(UBit | 0b01100 << 12),
595598
FCMLE_ZERO(UBit | 0b01101 << 12),
@@ -606,6 +609,7 @@ public enum ASIMDInstruction {
606609
/* Advanced SIMD three different (C4-365). */
607610
SMLAL(0b1000 << 12),
608611
SMLSL(0b1010 << 12),
612+
PMULL(0b1110 << 12),
609613
UMLAL(UBit | 0b1000 << 12),
610614
UMLSL(UBit | 0b1010 << 12),
611615

@@ -839,10 +843,14 @@ private void permuteEncoding(ASIMDInstruction instr, ASIMDSize size, ElementSize
839843
}
840844

841845
private void copyEncoding(ASIMDInstruction instr, boolean setQBit, ElementSize eSize, Register dst, Register src, int index) {
846+
copyEncoding(instr, 0, setQBit, eSize, dst, src, index);
847+
}
848+
849+
private void copyEncoding(ASIMDInstruction instr, int extraEncoding, boolean setQBit, ElementSize eSize, Register dst, Register src, int index) {
842850
assert index >= 0 && index < ASIMDSize.FullReg.bytes() / eSize.bytes();
843851
int baseEncoding = 0b0_0_0_01110000_00000_0_0000_1_00000_00000;
844852
int imm5Encoding = (index * 2 * eSize.bytes() | eSize.bytes()) << 16;
845-
emitInt(instr.encoding | baseEncoding | qBit(setQBit) | imm5Encoding | rd(dst) | rs1(src));
853+
emitInt(instr.encoding | extraEncoding | baseEncoding | qBit(setQBit) | imm5Encoding | rd(dst) | rs1(src));
846854
}
847855

848856
private void twoRegMiscEncoding(ASIMDInstruction instr, ASIMDSize size, int eSizeEncoding, Register dst, Register src) {
@@ -2063,6 +2071,26 @@ public void fsubVVV(ASIMDSize size, ElementSize eSize, Register dst, Register sr
20632071
threeSameEncoding(ASIMDInstruction.FSUB, size, elemSize1X(eSize), dst, src1, src2);
20642072
}
20652073

2074+
/**
2075+
* C7.2.175 Insert vector element from another vector element.<br>
2076+
*
2077+
* This instruction copies the vector element of the source register to the specified vector
2078+
* element of the destination register.
2079+
*
2080+
* @param eSize size of value to duplicate.
2081+
* @param dst SIMD register.
2082+
* @param dstIdx offset of value to store.
2083+
* @param src SIMD register.
2084+
* @param srcIdx offset of value to duplicate.
2085+
*/
2086+
public void insXX(ElementSize eSize, Register dst, int dstIdx, Register src, int srcIdx) {
2087+
assert dstIdx >= 0 && dstIdx < ASIMDSize.FullReg.bytes() / eSize.bytes();
2088+
assert srcIdx >= 0 && srcIdx < ASIMDSize.FullReg.bytes() / eSize.bytes();
2089+
2090+
int srcIdxEncoding = (srcIdx * eSize.bytes()) << 11;
2091+
copyEncoding(ASIMDInstruction.INSELEM, srcIdxEncoding, true, eSize, dst, src, dstIdx);
2092+
}
2093+
20662094
/**
20672095
* C7.2.176 Insert vector element from general-purpose register.<br>
20682096
*
@@ -2339,6 +2367,59 @@ public void orrVVV(ASIMDSize size, Register dst, Register src1, Register src2) {
23392367
threeSameEncoding(ASIMDInstruction.ORR, size, elemSize10, dst, src1, src2);
23402368
}
23412369

2370+
/**
2371+
* C7.2.215 Polynomial Multiply Long (lower half).<br>
2372+
*
2373+
* This instruction multiplies corresponding elements in the lower half of the vectors.
2374+
*
2375+
* @param srcESize source element size. Must be ElementSize.Byte or ElementSize.DoubleWord.
2376+
* @param dst SIMD register.
2377+
* @param src1 SIMD register.
2378+
* @param src2 SIMD register.
2379+
*/
2380+
public void pmullVVV(ElementSize srcESize, Register dst, Register src1, Register src2) {
2381+
assert dst.getRegisterCategory().equals(SIMD);
2382+
assert src1.getRegisterCategory().equals(SIMD);
2383+
assert src2.getRegisterCategory().equals(SIMD);
2384+
assert srcESize == ElementSize.Byte || srcESize == ElementSize.DoubleWord;
2385+
2386+
threeDifferentEncoding(ASIMDInstruction.PMULL, false, elemSizeXX(srcESize), dst, src1, src2);
2387+
}
2388+
2389+
/**
2390+
* C7.2.215 Polynomial Multiply Long (upper half).<br>
2391+
*
2392+
* This instruction multiplies corresponding elements in the upper half of the vectors.
2393+
*
2394+
* @param srcESize source element size. Must be ElementSize.Byte or ElementSize.DoubleWord.
2395+
* @param dst SIMD register.
2396+
* @param src1 SIMD register.
2397+
* @param src2 SIMD register.
2398+
*/
2399+
public void pmull2VVV(ElementSize srcESize, Register dst, Register src1, Register src2) {
2400+
assert dst.getRegisterCategory().equals(SIMD);
2401+
assert src1.getRegisterCategory().equals(SIMD);
2402+
assert src2.getRegisterCategory().equals(SIMD);
2403+
assert srcESize == ElementSize.Byte || srcESize == ElementSize.DoubleWord;
2404+
2405+
threeDifferentEncoding(ASIMDInstruction.PMULL, true, elemSizeXX(srcESize), dst, src1, src2);
2406+
}
2407+
2408+
/**
2409+
* C7.2.218 Reverse Bit order.<br>
2410+
* This instruction reverses the bits in each byte.
2411+
*
2412+
* @param size register size.
2413+
* @param dst SIMD register.
2414+
* @param src SIMD register.
2415+
*/
2416+
public void rbitVV(ASIMDSize size, Register dst, Register src) {
2417+
assert dst.getRegisterCategory().equals(SIMD);
2418+
assert src.getRegisterCategory().equals(SIMD);
2419+
2420+
twoRegMiscEncoding(ASIMDInstruction.RBIT, size, elemSize01, dst, src);
2421+
}
2422+
23422423
/**
23432424
* C7.2.219 Reverse elements in 16-bit halfwords.<br>
23442425
* This instruction reverses the order of 8-bit elements in each halfword.

compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDMacroAssembler.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,21 @@ public void revVV(ASIMDSize size, ElementSize eSize, Register dst, Register src)
275275
}
276276
}
277277

278+
/**
279+
* C7.2.200 Move vector element to another vector element.<br>
280+
* <p>
281+
* Preferred alias for insert vector element from another vector element.
282+
*
283+
* @param eSize size of value to duplicate.
284+
* @param dst SIMD register.
285+
* @param dstIdx offset of value to store.
286+
* @param src SIMD register.
287+
* @param srcIdx offset of value to duplicate.
288+
*/
289+
public void movXX(ElementSize eSize, Register dst, int dstIdx, Register src, int srcIdx) {
290+
insXX(eSize, dst, dstIdx, src, srcIdx);
291+
}
292+
278293
/**
279294
* C7.2.207 Bitwise not.<br>
280295
* <p>

compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64Assembler.java

Lines changed: 105 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@
8484
import java.util.EnumSet;
8585

8686
import org.graalvm.compiler.asm.Label;
87-
import org.graalvm.compiler.core.common.Stride;
8887
import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize;
88+
import org.graalvm.compiler.core.common.Stride;
8989
import org.graalvm.compiler.core.common.calc.Condition;
9090
import org.graalvm.compiler.debug.GraalError;
9191
import org.graalvm.compiler.options.Option;
@@ -1073,6 +1073,7 @@ private enum VEXOpAssertion {
10731073
MASK_NULL_XMM_AVX512BW_VL(CPUFeature.AVX512VL, CPUFeature.AVX512VL, null, EVEXFeatureAssertion.AVX512F_BW_VL, MASK, null, XMM, null),
10741074
MASK_NULL_XMM_AVX512DQ_VL(CPUFeature.AVX512VL, CPUFeature.AVX512VL, null, EVEXFeatureAssertion.AVX512F_DQ_VL, MASK, null, XMM, null),
10751075
MASK_XMM_XMM_AVX512F_VL(CPUFeature.AVX512VL, CPUFeature.AVX512VL, null, EVEXFeatureAssertion.AVX512F_VL, MASK, XMM, XMM, null),
1076+
AVX1_128ONLY_CLMUL(CPUFeature.AVX, null, CPUFeature.CLMUL, null, XMM, XMM, XMM, XMM),
10761077
AVX1_128ONLY_AES(CPUFeature.AVX, null, CPUFeature.AES, null, XMM, XMM, XMM, XMM);
10771078

10781079
private final CPUFeature l128feature;
@@ -1989,6 +1990,29 @@ public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, i
19891990
}
19901991
}
19911992

1993+
public static final class VexShiftImmOp extends VexOp implements VexRRIOp {
1994+
// @formatter:off
1995+
public static final VexShiftImmOp VPSLLDQ = new VexShiftImmOp("VPSLLDQ", P_66, M_0F, WIG, 0x73, 7, VEXOpAssertion.AVX1_AVX2_AVX512BW_VL, EVEXTuple.FVM, WIG);
1996+
public static final VexShiftImmOp VPSRLDQ = new VexShiftImmOp("VPSRLDQ", P_66, M_0F, WIG, 0x73, 3, VEXOpAssertion.AVX1_AVX2_AVX512BW_VL, EVEXTuple.FVM, WIG);
1997+
// @formatter:on
1998+
1999+
private final int r;
2000+
2001+
private VexShiftImmOp(String opcode, int pp, int mmmmm, int w, int op, int r, VEXOpAssertion assertion, EVEXTuple evexTuple, int wEvex) {
2002+
super(opcode, pp, mmmmm, w, op, assertion, evexTuple, wEvex);
2003+
this.r = r;
2004+
}
2005+
2006+
@Override
2007+
public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
2008+
GraalError.guarantee(assertion.check(asm.getFeatures(), size, null, dst, src), "emitting invalid instruction");
2009+
asm.vexPrefix(null, dst, src, size, pp, mmmmm, w, wEvex, false, assertion.l128feature, assertion.l256feature);
2010+
asm.emitByte(op);
2011+
asm.emitModRM(r, src);
2012+
asm.emitByte(imm8);
2013+
}
2014+
}
2015+
19922016
/**
19932017
* Masked (i.e., conditional) SIMD loads and stores.
19942018
*/
@@ -2137,6 +2161,8 @@ public static final class VexRVMIOp extends VexOp {
21372161

21382162
// AVX2 128-bit permutation
21392163
public static final VexRVMIOp VPERM2I128 = new VexRVMIOp("VPERM2I128", P_66, M_0F3A, W0, 0x46, VEXOpAssertion.AVX2_256ONLY);
2164+
// Carry-Less Multiplication Quadword
2165+
public static final VexRVMIOp VPCLMULQDQ = new VexRVMIOp("VPCLMULQDQ", P_66, M_0F3A, WIG, 0x44, VEXOpAssertion.AVX1_128ONLY_CLMUL);
21402166
// Packed Align Right
21412167
public static final VexRVMIOp VPALIGNR = new VexRVMIOp("VPALIGNR", P_66, M_0F3A, WIG, 0x0F, VEXOpAssertion.AVX1_AVX2_AVX512BW_VL, EVEXTuple.FVM, WIG);
21422168

@@ -3839,6 +3865,16 @@ public final void pslld(Register dst, int imm8) {
38393865
emitByte(imm8 & 0xFF);
38403866
}
38413867

3868+
public final void pslldq(Register dst, int imm8) {
3869+
assert isUByte(imm8) : "invalid value";
3870+
assert inRC(XMM, dst);
3871+
// XMM7 is for /7 encoding: 66 0F 73 /7 ib
3872+
simdPrefix(AMD64.xmm7, dst, dst, PD, P_0F, false);
3873+
emitByte(0x73);
3874+
emitModRM(7, dst);
3875+
emitByte(imm8 & 0xFF);
3876+
}
3877+
38423878
public final void psllq(Register dst, Register shift) {
38433879
assert inRC(XMM, dst) && inRC(XMM, shift);
38443880
simdPrefix(dst, dst, shift, PD, P_0F, false);
@@ -3969,6 +4005,39 @@ public final void punpcklbw(Register dst, Register src) {
39694005
emitModRM(dst, src);
39704006
}
39714007

4008+
public final void pclmulqdq(Register dst, Register src, int imm8) {
4009+
assert supports(CPUFeature.CLMUL);
4010+
assert inRC(XMM, dst) && inRC(XMM, src);
4011+
simdPrefix(dst, dst, src, PD, P_0F3A, false);
4012+
emitByte(0x44);
4013+
emitModRM(dst, src);
4014+
emitByte(imm8);
4015+
}
4016+
4017+
public final void vpshufb(Register dst, Register src1, Register src2, AVXSize size) {
4018+
VexRVMOp.VPSHUFB.emit(this, size, dst, src1, src2);
4019+
}
4020+
4021+
public final void vpclmulqdq(Register dst, Register nds, Register src, int imm8) {
4022+
VexRVMIOp.VPCLMULQDQ.emit(this, AVXSize.XMM, dst, nds, src, imm8);
4023+
}
4024+
4025+
public final void vpclmullqlqdq(Register dst, Register nds, Register src) {
4026+
VexRVMIOp.VPCLMULQDQ.emit(this, AVXSize.XMM, dst, nds, src, 0x00);
4027+
}
4028+
4029+
public final void vpclmulhqlqdq(Register dst, Register nds, Register src) {
4030+
VexRVMIOp.VPCLMULQDQ.emit(this, AVXSize.XMM, dst, nds, src, 0x01);
4031+
}
4032+
4033+
public final void vpclmullqhqdq(Register dst, Register nds, Register src) {
4034+
VexRVMIOp.VPCLMULQDQ.emit(this, AVXSize.XMM, dst, nds, src, 0x10);
4035+
}
4036+
4037+
public final void vpclmulhqhqdq(Register dst, Register nds, Register src) {
4038+
VexRVMIOp.VPCLMULQDQ.emit(this, AVXSize.XMM, dst, nds, src, 0x11);
4039+
}
4040+
39724041
public final void rcpps(Register dst, Register src) {
39734042
assert inRC(XMM, dst) && inRC(XMM, src);
39744043
simdPrefix(dst, Register.None, src, PS, P_0F, false);
@@ -4106,6 +4175,10 @@ public final void unpcklpd(Register dst, Register src) {
41064175
emitModRM(dst, src);
41074176
}
41084177

4178+
public final void xorb(Register dst, AMD64Address src) {
4179+
XOR.byteRmOp.emit(this, BYTE, dst, src);
4180+
}
4181+
41094182
public final void xorl(Register dst, Register src) {
41104183
XOR.rmOp.emit(this, DWORD, dst, src);
41114184
}
@@ -4666,6 +4739,17 @@ public final void call() {
46664739
emitInt(0);
46674740
}
46684741

4742+
public final void call(Label l) {
4743+
if (l.isBound()) {
4744+
emitByte(0xE8);
4745+
emitInt(l.position());
4746+
} else {
4747+
l.addPatchAt(position(), this);
4748+
emitByte(0xE8);
4749+
emitInt(0);
4750+
}
4751+
}
4752+
46694753
public final void call(Register src) {
46704754
prefix(src);
46714755
emitByte(0xFF);
@@ -4894,8 +4978,8 @@ public void clflushopt(AMD64Address adr) {
48944978
emitOperandHelper(7, adr, 0);
48954979
}
48964980

4897-
public final void vpand(Register dst, Register nds, Register src) {
4898-
VexRVMOp.VPAND.emit(this, AVXSize.YMM, dst, nds, src);
4981+
public final void vpand(Register dst, Register nds, Register src, AVXSize size) {
4982+
VexRVMOp.VPAND.emit(this, size, dst, nds, src);
48994983
}
49004984

49014985
public final void vpandn(Register dst, Register nds, Register src) {
@@ -4906,16 +4990,16 @@ public final void vpor(Register dst, Register nds, Register src) {
49064990
VexRVMOp.VPOR.emit(this, AVXSize.YMM, dst, nds, src);
49074991
}
49084992

4909-
public final void vptest(Register dst, Register src) {
4910-
VexRMOp.VPTEST.emit(this, AVXSize.YMM, dst, src);
4993+
public final void vptest(Register dst, Register src, AVXSize size) {
4994+
VexRMOp.VPTEST.emit(this, size, dst, src);
49114995
}
49124996

4913-
public final void vpxor(Register dst, Register nds, Register src) {
4914-
VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
4997+
public final void vpxor(Register dst, Register nds, Register src, AVXSize size) {
4998+
VexRVMOp.VPXOR.emit(this, size, dst, nds, src);
49154999
}
49165000

4917-
public final void vpxor(Register dst, Register nds, AMD64Address src) {
4918-
VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
5001+
public final void vpxor(Register dst, Register nds, AMD64Address src, AVXSize size) {
5002+
VexRVMOp.VPXOR.emit(this, size, dst, nds, src);
49195003
}
49205004

49215005
public final void vpsllw(Register dst, Register src, int imm8) {
@@ -4926,12 +5010,20 @@ public final void vpsrlw(Register dst, Register src, int imm8) {
49265010
VexShiftOp.VPSRLW.emit(this, AVXSize.YMM, dst, src, imm8);
49275011
}
49285012

4929-
public final void vpslld(Register dst, Register src, int imm8) {
4930-
VexShiftOp.VPSLLD.emit(this, AVXSize.YMM, dst, src, imm8);
5013+
public final void vpslld(Register dst, Register src, int imm8, AVXSize size) {
5014+
VexShiftOp.VPSLLD.emit(this, size, dst, src, imm8);
5015+
}
5016+
5017+
public final void vpslldq(Register dst, Register src, int imm8, AVXSize size) {
5018+
VexShiftImmOp.VPSLLDQ.emit(this, size, dst, src, imm8);
5019+
}
5020+
5021+
public final void vpsrld(Register dst, Register src, int imm8, AVXSize size) {
5022+
VexShiftOp.VPSRLD.emit(this, size, dst, src, imm8);
49315023
}
49325024

4933-
public final void vpsrld(Register dst, Register src, int imm8) {
4934-
VexShiftOp.VPSRLD.emit(this, AVXSize.YMM, dst, src, imm8);
5025+
public final void vpsrldq(Register dst, Register src, int imm8, AVXSize size) {
5026+
VexShiftImmOp.VPSRLDQ.emit(this, size, dst, src, imm8);
49355027
}
49365028

49375029
public final void vpcmpeqb(Register dst, Register src1, Register src2) {

compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64MacroAssembler.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,10 @@ public final void testlAndJcc(Register src, int imm32, ConditionFlag cc, Label b
600600
applyMIOpAndJcc(AMD64MIOp.TEST, DWORD, src, imm32, cc, branchTarget, isShortJmp, false, null);
601601
}
602602

603+
public final void testqAndJcc(Register src, int imm32, ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
604+
applyMIOpAndJcc(AMD64MIOp.TEST, QWORD, src, imm32, cc, branchTarget, isShortJmp, false, null);
605+
}
606+
603607
public final void testAndJcc(OperandSize size, AMD64Address src, int imm32, ConditionFlag cc, Label branchTarget, boolean isShortJmp, IntConsumer applyBeforeFusedPair) {
604608
applyMIOpAndJcc(AMD64MIOp.TEST, size, src, imm32, cc, branchTarget, isShortJmp, false, applyBeforeFusedPair);
605609
}

0 commit comments

Comments
 (0)