Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions compiler/mx.compiler/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,8 @@
"dependencies" : [
"JVMCI_HOTSPOT",
"org.graalvm.compiler.api.runtime",
"org.graalvm.compiler.replacements",
"org.graalvm.compiler.replacements.amd64",
"org.graalvm.compiler.replacements.aarch64",
"org.graalvm.compiler.printer",
"org.graalvm.compiler.runtime",
],
Expand Down Expand Up @@ -635,7 +636,6 @@
"dependencies" : [
"org.graalvm.compiler.core.aarch64",
"org.graalvm.compiler.hotspot",
"org.graalvm.compiler.replacements.aarch64",
],
"requires" : [
"jdk.unsupported" # sun.misc.Unsafe
Expand All @@ -653,7 +653,6 @@
"sourceDirs" : ["src"],
"dependencies" : [
"org.graalvm.compiler.core.amd64",
"org.graalvm.compiler.replacements.amd64",
"org.graalvm.compiler.hotspot",
],
"requiresConcealed" : {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,7 @@ public enum ASIMDInstruction {
INSGEN(0b0011 << 11),
SMOV(0b0101 << 11),
UMOV(0b0111 << 11),
INSELEM(0b1 << 29),

/* Advanced SIMD two-register miscellaneous (C4-361). */
/* size xx */
Expand Down Expand Up @@ -590,6 +591,8 @@ public enum ASIMDInstruction {
NEG(UBit | 0b01011 << 12),
/* UBit 1, size 00 */
NOT(UBit | 0b00101 << 12),
/* UBit 1, size 01 */
RBIT(UBit | 0b00101 << 12),
/* UBit 1, size 1x */
FCMGE_ZERO(UBit | 0b01100 << 12),
FCMLE_ZERO(UBit | 0b01101 << 12),
Expand All @@ -606,6 +609,7 @@ public enum ASIMDInstruction {
/* Advanced SIMD three different (C4-365). */
SMLAL(0b1000 << 12),
SMLSL(0b1010 << 12),
PMULL(0b1110 << 12),
UMLAL(UBit | 0b1000 << 12),
UMLSL(UBit | 0b1010 << 12),

Expand Down Expand Up @@ -839,10 +843,14 @@ private void permuteEncoding(ASIMDInstruction instr, ASIMDSize size, ElementSize
}

private void copyEncoding(ASIMDInstruction instr, boolean setQBit, ElementSize eSize, Register dst, Register src, int index) {
copyEncoding(instr, 0, setQBit, eSize, dst, src, index);
}

private void copyEncoding(ASIMDInstruction instr, int extraEncoding, boolean setQBit, ElementSize eSize, Register dst, Register src, int index) {
assert index >= 0 && index < ASIMDSize.FullReg.bytes() / eSize.bytes();
int baseEncoding = 0b0_0_0_01110000_00000_0_0000_1_00000_00000;
int imm5Encoding = (index * 2 * eSize.bytes() | eSize.bytes()) << 16;
emitInt(instr.encoding | baseEncoding | qBit(setQBit) | imm5Encoding | rd(dst) | rs1(src));
emitInt(instr.encoding | extraEncoding | baseEncoding | qBit(setQBit) | imm5Encoding | rd(dst) | rs1(src));
}

private void twoRegMiscEncoding(ASIMDInstruction instr, ASIMDSize size, int eSizeEncoding, Register dst, Register src) {
Expand Down Expand Up @@ -2063,6 +2071,26 @@ public void fsubVVV(ASIMDSize size, ElementSize eSize, Register dst, Register sr
threeSameEncoding(ASIMDInstruction.FSUB, size, elemSize1X(eSize), dst, src1, src2);
}

/**
* C7.2.175 Insert vector element from another vector element.<br>
*
* This instruction copies the vector element of the source register to the specified vector
* element of the destination register.
*
* @param eSize size of value to duplicate.
* @param dst SIMD register.
* @param dstIdx offset of value to store.
* @param src SIMD register.
* @param srcIdx offset of value to duplicate.
*/
public void insXX(ElementSize eSize, Register dst, int dstIdx, Register src, int srcIdx) {
assert dstIdx >= 0 && dstIdx < ASIMDSize.FullReg.bytes() / eSize.bytes();
assert srcIdx >= 0 && srcIdx < ASIMDSize.FullReg.bytes() / eSize.bytes();

int srcIdxEncoding = (srcIdx * eSize.bytes()) << 11;
copyEncoding(ASIMDInstruction.INSELEM, srcIdxEncoding, true, eSize, dst, src, dstIdx);
}

/**
* C7.2.176 Insert vector element from general-purpose register.<br>
*
Expand Down Expand Up @@ -2339,6 +2367,59 @@ public void orrVVV(ASIMDSize size, Register dst, Register src1, Register src2) {
threeSameEncoding(ASIMDInstruction.ORR, size, elemSize10, dst, src1, src2);
}

/**
* C7.2.215 Polynomial Multiply Long (lower half).<br>
*
* This instruction multiplies corresponding elements in the lower half of the vectors.
*
* @param srcESize source element size. Must be ElementSize.Byte or ElementSize.DoubleWord.
* @param dst SIMD register.
* @param src1 SIMD register.
* @param src2 SIMD register.
*/
public void pmullVVV(ElementSize srcESize, Register dst, Register src1, Register src2) {
assert dst.getRegisterCategory().equals(SIMD);
assert src1.getRegisterCategory().equals(SIMD);
assert src2.getRegisterCategory().equals(SIMD);
assert srcESize == ElementSize.Byte || srcESize == ElementSize.DoubleWord;

threeDifferentEncoding(ASIMDInstruction.PMULL, false, elemSizeXX(srcESize), dst, src1, src2);
}

/**
* C7.2.215 Polynomial Multiply Long (upper half).<br>
*
* This instruction multiplies corresponding elements in the upper half of the vectors.
*
* @param srcESize source element size. Must be ElementSize.Byte or ElementSize.DoubleWord.
* @param dst SIMD register.
* @param src1 SIMD register.
* @param src2 SIMD register.
*/
public void pmull2VVV(ElementSize srcESize, Register dst, Register src1, Register src2) {
assert dst.getRegisterCategory().equals(SIMD);
assert src1.getRegisterCategory().equals(SIMD);
assert src2.getRegisterCategory().equals(SIMD);
assert srcESize == ElementSize.Byte || srcESize == ElementSize.DoubleWord;

threeDifferentEncoding(ASIMDInstruction.PMULL, true, elemSizeXX(srcESize), dst, src1, src2);
}

/**
* C7.2.218 Reverse Bit order.<br>
* This instruction reverses the bits in each byte.
*
* @param size register size.
* @param dst SIMD register.
* @param src SIMD register.
*/
public void rbitVV(ASIMDSize size, Register dst, Register src) {
assert dst.getRegisterCategory().equals(SIMD);
assert src.getRegisterCategory().equals(SIMD);

twoRegMiscEncoding(ASIMDInstruction.RBIT, size, elemSize01, dst, src);
}

/**
* C7.2.219 Reverse elements in 16-bit halfwords.<br>
* This instruction reverses the order of 8-bit elements in each halfword.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,21 @@ public void revVV(ASIMDSize size, ElementSize eSize, Register dst, Register src)
}
}

/**
* C7.2.200 Move vector element to another vector element.<br>
* <p>
* Preferred alias for insert vector element from another vector element.
*
* @param eSize size of value to duplicate.
* @param dst SIMD register.
* @param dstIdx offset of value to store.
* @param src SIMD register.
* @param srcIdx offset of value to duplicate.
*/
public void movXX(ElementSize eSize, Register dst, int dstIdx, Register src, int srcIdx) {
insXX(eSize, dst, dstIdx, src, srcIdx);
}

/**
* C7.2.207 Bitwise not.<br>
* <p>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@
import java.util.EnumSet;

import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.core.common.Stride;
import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize;
import org.graalvm.compiler.core.common.Stride;
import org.graalvm.compiler.core.common.calc.Condition;
import org.graalvm.compiler.debug.GraalError;
import org.graalvm.compiler.options.Option;
Expand Down Expand Up @@ -1073,6 +1073,7 @@ private enum VEXOpAssertion {
MASK_NULL_XMM_AVX512BW_VL(CPUFeature.AVX512VL, CPUFeature.AVX512VL, null, EVEXFeatureAssertion.AVX512F_BW_VL, MASK, null, XMM, null),
MASK_NULL_XMM_AVX512DQ_VL(CPUFeature.AVX512VL, CPUFeature.AVX512VL, null, EVEXFeatureAssertion.AVX512F_DQ_VL, MASK, null, XMM, null),
MASK_XMM_XMM_AVX512F_VL(CPUFeature.AVX512VL, CPUFeature.AVX512VL, null, EVEXFeatureAssertion.AVX512F_VL, MASK, XMM, XMM, null),
AVX1_128ONLY_CLMUL(CPUFeature.AVX, null, CPUFeature.CLMUL, null, XMM, XMM, XMM, XMM),
AVX1_128ONLY_AES(CPUFeature.AVX, null, CPUFeature.AES, null, XMM, XMM, XMM, XMM);

private final CPUFeature l128feature;
Expand Down Expand Up @@ -1989,6 +1990,29 @@ public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, i
}
}

public static final class VexShiftImmOp extends VexOp implements VexRRIOp {
// @formatter:off
public static final VexShiftImmOp VPSLLDQ = new VexShiftImmOp("VPSLLDQ", P_66, M_0F, WIG, 0x73, 7, VEXOpAssertion.AVX1_AVX2_AVX512BW_VL, EVEXTuple.FVM, WIG);
public static final VexShiftImmOp VPSRLDQ = new VexShiftImmOp("VPSRLDQ", P_66, M_0F, WIG, 0x73, 3, VEXOpAssertion.AVX1_AVX2_AVX512BW_VL, EVEXTuple.FVM, WIG);
// @formatter:on

private final int r;

private VexShiftImmOp(String opcode, int pp, int mmmmm, int w, int op, int r, VEXOpAssertion assertion, EVEXTuple evexTuple, int wEvex) {
super(opcode, pp, mmmmm, w, op, assertion, evexTuple, wEvex);
this.r = r;
}

@Override
public void emit(AMD64Assembler asm, AVXSize size, Register dst, Register src, int imm8) {
GraalError.guarantee(assertion.check(asm.getFeatures(), size, null, dst, src), "emitting invalid instruction");
asm.vexPrefix(null, dst, src, size, pp, mmmmm, w, wEvex, false, assertion.l128feature, assertion.l256feature);
asm.emitByte(op);
asm.emitModRM(r, src);
asm.emitByte(imm8);
}
}

/**
* Masked (i.e., conditional) SIMD loads and stores.
*/
Expand Down Expand Up @@ -2137,6 +2161,8 @@ public static final class VexRVMIOp extends VexOp {

// AVX2 128-bit permutation
public static final VexRVMIOp VPERM2I128 = new VexRVMIOp("VPERM2I128", P_66, M_0F3A, W0, 0x46, VEXOpAssertion.AVX2_256ONLY);
// Carry-Less Multiplication Quadword
public static final VexRVMIOp VPCLMULQDQ = new VexRVMIOp("VPCLMULQDQ", P_66, M_0F3A, WIG, 0x44, VEXOpAssertion.AVX1_128ONLY_CLMUL);
// Packed Align Right
public static final VexRVMIOp VPALIGNR = new VexRVMIOp("VPALIGNR", P_66, M_0F3A, WIG, 0x0F, VEXOpAssertion.AVX1_AVX2_AVX512BW_VL, EVEXTuple.FVM, WIG);

Expand Down Expand Up @@ -3839,6 +3865,16 @@ public final void pslld(Register dst, int imm8) {
emitByte(imm8 & 0xFF);
}

public final void pslldq(Register dst, int imm8) {
assert isUByte(imm8) : "invalid value";
assert inRC(XMM, dst);
// XMM7 is for /7 encoding: 66 0F 73 /7 ib
simdPrefix(AMD64.xmm7, dst, dst, PD, P_0F, false);
emitByte(0x73);
emitModRM(7, dst);
emitByte(imm8 & 0xFF);
}

public final void psllq(Register dst, Register shift) {
assert inRC(XMM, dst) && inRC(XMM, shift);
simdPrefix(dst, dst, shift, PD, P_0F, false);
Expand Down Expand Up @@ -3969,6 +4005,39 @@ public final void punpcklbw(Register dst, Register src) {
emitModRM(dst, src);
}

public final void pclmulqdq(Register dst, Register src, int imm8) {
assert supports(CPUFeature.CLMUL);
assert inRC(XMM, dst) && inRC(XMM, src);
simdPrefix(dst, dst, src, PD, P_0F3A, false);
emitByte(0x44);
emitModRM(dst, src);
emitByte(imm8);
}

public final void vpshufb(Register dst, Register src1, Register src2, AVXSize size) {
VexRVMOp.VPSHUFB.emit(this, size, dst, src1, src2);
}

public final void vpclmulqdq(Register dst, Register nds, Register src, int imm8) {
VexRVMIOp.VPCLMULQDQ.emit(this, AVXSize.XMM, dst, nds, src, imm8);
}

public final void vpclmullqlqdq(Register dst, Register nds, Register src) {
VexRVMIOp.VPCLMULQDQ.emit(this, AVXSize.XMM, dst, nds, src, 0x00);
}

public final void vpclmulhqlqdq(Register dst, Register nds, Register src) {
VexRVMIOp.VPCLMULQDQ.emit(this, AVXSize.XMM, dst, nds, src, 0x01);
}

public final void vpclmullqhqdq(Register dst, Register nds, Register src) {
VexRVMIOp.VPCLMULQDQ.emit(this, AVXSize.XMM, dst, nds, src, 0x10);
}

public final void vpclmulhqhqdq(Register dst, Register nds, Register src) {
VexRVMIOp.VPCLMULQDQ.emit(this, AVXSize.XMM, dst, nds, src, 0x11);
}

public final void rcpps(Register dst, Register src) {
assert inRC(XMM, dst) && inRC(XMM, src);
simdPrefix(dst, Register.None, src, PS, P_0F, false);
Expand Down Expand Up @@ -4106,6 +4175,10 @@ public final void unpcklpd(Register dst, Register src) {
emitModRM(dst, src);
}

public final void xorb(Register dst, AMD64Address src) {
XOR.byteRmOp.emit(this, BYTE, dst, src);
}

public final void xorl(Register dst, Register src) {
XOR.rmOp.emit(this, DWORD, dst, src);
}
Expand Down Expand Up @@ -4666,6 +4739,17 @@ public final void call() {
emitInt(0);
}

public final void call(Label l) {
if (l.isBound()) {
emitByte(0xE8);
emitInt(l.position());
} else {
l.addPatchAt(position(), this);
emitByte(0xE8);
emitInt(0);
}
}

public final void call(Register src) {
prefix(src);
emitByte(0xFF);
Expand Down Expand Up @@ -4894,8 +4978,8 @@ public void clflushopt(AMD64Address adr) {
emitOperandHelper(7, adr, 0);
}

public final void vpand(Register dst, Register nds, Register src) {
VexRVMOp.VPAND.emit(this, AVXSize.YMM, dst, nds, src);
public final void vpand(Register dst, Register nds, Register src, AVXSize size) {
VexRVMOp.VPAND.emit(this, size, dst, nds, src);
}

public final void vpandn(Register dst, Register nds, Register src) {
Expand All @@ -4906,16 +4990,16 @@ public final void vpor(Register dst, Register nds, Register src) {
VexRVMOp.VPOR.emit(this, AVXSize.YMM, dst, nds, src);
}

public final void vptest(Register dst, Register src) {
VexRMOp.VPTEST.emit(this, AVXSize.YMM, dst, src);
public final void vptest(Register dst, Register src, AVXSize size) {
VexRMOp.VPTEST.emit(this, size, dst, src);
}

public final void vpxor(Register dst, Register nds, Register src) {
VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
public final void vpxor(Register dst, Register nds, Register src, AVXSize size) {
VexRVMOp.VPXOR.emit(this, size, dst, nds, src);
}

public final void vpxor(Register dst, Register nds, AMD64Address src) {
VexRVMOp.VPXOR.emit(this, AVXSize.YMM, dst, nds, src);
public final void vpxor(Register dst, Register nds, AMD64Address src, AVXSize size) {
VexRVMOp.VPXOR.emit(this, size, dst, nds, src);
}

public final void vpsllw(Register dst, Register src, int imm8) {
Expand All @@ -4926,12 +5010,20 @@ public final void vpsrlw(Register dst, Register src, int imm8) {
VexShiftOp.VPSRLW.emit(this, AVXSize.YMM, dst, src, imm8);
}

public final void vpslld(Register dst, Register src, int imm8) {
VexShiftOp.VPSLLD.emit(this, AVXSize.YMM, dst, src, imm8);
public final void vpslld(Register dst, Register src, int imm8, AVXSize size) {
VexShiftOp.VPSLLD.emit(this, size, dst, src, imm8);
}

public final void vpslldq(Register dst, Register src, int imm8, AVXSize size) {
VexShiftImmOp.VPSLLDQ.emit(this, size, dst, src, imm8);
}

public final void vpsrld(Register dst, Register src, int imm8, AVXSize size) {
VexShiftOp.VPSRLD.emit(this, size, dst, src, imm8);
}

public final void vpsrld(Register dst, Register src, int imm8) {
VexShiftOp.VPSRLD.emit(this, AVXSize.YMM, dst, src, imm8);
public final void vpsrldq(Register dst, Register src, int imm8, AVXSize size) {
VexShiftImmOp.VPSRLDQ.emit(this, size, dst, src, imm8);
}

public final void vpcmpeqb(Register dst, Register src1, Register src2) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,10 @@ public final void testlAndJcc(Register src, int imm32, ConditionFlag cc, Label b
applyMIOpAndJcc(AMD64MIOp.TEST, DWORD, src, imm32, cc, branchTarget, isShortJmp, false, null);
}

public final void testqAndJcc(Register src, int imm32, ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
applyMIOpAndJcc(AMD64MIOp.TEST, QWORD, src, imm32, cc, branchTarget, isShortJmp, false, null);
}

public final void testAndJcc(OperandSize size, AMD64Address src, int imm32, ConditionFlag cc, Label branchTarget, boolean isShortJmp, IntConsumer applyBeforeFusedPair) {
applyMIOpAndJcc(AMD64MIOp.TEST, size, src, imm32, cc, branchTarget, isShortJmp, false, applyBeforeFusedPair);
}
Expand Down
Loading