Skip to content

Commit a96499b

Browse files
committed
[GR-42961] [GR-23798] [GR-42783] TruffleStrings: add ByteIndexOfCodePointSetNode.
PullRequest: graal/13582
2 parents f1f6f74 + 29fa37d commit a96499b

File tree

102 files changed

+5125
-2062
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+5125
-2062
lines changed

compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64Assembler.java

Lines changed: 52 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -771,6 +771,16 @@ public static class SSEOp extends AMD64RMOp {
771771
public static final SSEOp MIN = new SSEOp("MIN", P_0F, 0x5D, PreferredNDS.DST);
772772
public static final SSEOp DIV = new SSEOp("DIV", P_0F, 0x5E, PreferredNDS.DST);
773773
public static final SSEOp MAX = new SSEOp("MAX", P_0F, 0x5F, PreferredNDS.DST);
774+
public static final SSEOp PSUBUSB = new SSEOp("PSUBUSB", P_0F, 0xD8, PreferredNDS.DST, OpAssertion.PackedDoubleAssertion);
775+
public static final SSEOp PSUBUSW = new SSEOp("PSUBUSW", P_0F, 0xD9, PreferredNDS.DST, OpAssertion.PackedDoubleAssertion);
776+
public static final SSEOp PMINUB = new SSEOp("PMINUB", P_0F, 0xDA, PreferredNDS.DST, OpAssertion.PackedDoubleAssertion);
777+
public static final SSEOp PMINUW = new SSEOp("PMINUW", P_0F38, 0x3A, PreferredNDS.DST, OpAssertion.PackedDoubleAssertion, CPUFeature.SSE4_1);
778+
public static final SSEOp PMINUD = new SSEOp("PMINUD", P_0F38, 0x3B, PreferredNDS.DST, OpAssertion.PackedDoubleAssertion, CPUFeature.SSE4_1);
779+
780+
public static final SSEOp PACKUSWB = new SSEOp("PACKUSWB", P_0F, 0x67, PreferredNDS.DST, OpAssertion.PackedDoubleAssertion);
781+
public static final SSEOp PACKUSDW = new SSEOp("PACKUSDW", P_0F38, 0x2B, PreferredNDS.DST, OpAssertion.PackedDoubleAssertion, CPUFeature.SSE4_1);
782+
783+
public static final SSEOp PSHUFB = new SSEOp("PSHUFB", P_0F38, 0x00, PreferredNDS.DST, OpAssertion.PackedDoubleAssertion, CPUFeature.SSSE3);
774784

775785
// MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix
776786
public static final SSEOp MOVD = new SSEOp("MOVD", 0x66, P_0F, 0x6E, PreferredNDS.NONE, OpAssertion.DwordToFloatAssertion);
@@ -785,12 +795,24 @@ protected SSEOp(String opcode, int prefix, int op, PreferredNDS preferredNDS) {
785795
this(opcode, 0, prefix, op, preferredNDS, OpAssertion.FloatAssertion);
786796
}
787797

798+
protected SSEOp(String opcode, int prefix, int op, PreferredNDS preferredNDS, CPUFeature feature) {
799+
this(opcode, 0, prefix, op, preferredNDS, OpAssertion.FloatAssertion, feature);
800+
}
801+
788802
protected SSEOp(String opcode, int prefix, int op, PreferredNDS preferredNDS, OpAssertion assertion) {
789803
this(opcode, 0, prefix, op, preferredNDS, assertion);
790804
}
791805

806+
protected SSEOp(String opcode, int prefix, int op, PreferredNDS preferredNDS, OpAssertion assertion, CPUFeature feature) {
807+
this(opcode, 0, prefix, op, preferredNDS, assertion, feature);
808+
}
809+
792810
protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, PreferredNDS preferredNDS, OpAssertion assertion) {
793-
super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2);
811+
this(opcode, mandatoryPrefix, prefix, op, preferredNDS, assertion, CPUFeature.SSE2);
812+
}
813+
814+
protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, PreferredNDS preferredNDS, OpAssertion assertion, CPUFeature feature) {
815+
super(opcode, mandatoryPrefix, prefix, op, assertion, feature);
794816
this.preferredNDS = preferredNDS;
795817
}
796818

@@ -1651,6 +1673,7 @@ public static class VexRVMOp extends VexOp {
16511673
public static final VexRVMOp VPMULLD = new VexRVMOp("VPMULLD", P_66, M_0F38, WIG, 0x40, VEXOpAssertion.AVX1_AVX2_AVX512F_VL, EVEXTuple.FVM, W0);
16521674
public static final VexRVMOp VPMULLQ = new VexRVMOp("VPMULLQ", P_66, M_0F38, W1, 0x40, VEXOpAssertion.AVX512DQ_VL, EVEXTuple.FVM, W1);
16531675
public static final VexRVMOp VPSUBUSB = new VexRVMOp("VPSUBUSB", P_66, M_0F, WIG, 0xD8, VEXOpAssertion.AVX1_AVX2_AVX512BW_VL, EVEXTuple.FVM, WIG);
1676+
public static final VexRVMOp VPSUBUSW = new VexRVMOp("VPSUBUSW", P_66, M_0F, WIG, 0xD9, VEXOpAssertion.AVX1_AVX2_AVX512BW_VL, EVEXTuple.FVM, WIG);
16541677
public static final VexRVMOp VPSUBB = new VexRVMOp("VPSUBB", P_66, M_0F, WIG, 0xF8, VEXOpAssertion.AVX1_AVX2_AVX512BW_VL, EVEXTuple.FVM, WIG);
16551678
public static final VexRVMOp VPSUBW = new VexRVMOp("VPSUBW", P_66, M_0F, WIG, 0xF9, VEXOpAssertion.AVX1_AVX2_AVX512BW_VL, EVEXTuple.FVM, WIG);
16561679
public static final VexRVMOp VPSUBD = new VexRVMOp("VPSUBD", P_66, M_0F, WIG, 0xFA, VEXOpAssertion.AVX1_AVX2_AVX512F_VL, EVEXTuple.FVM, W0);
@@ -3320,24 +3343,12 @@ public final void orl(Register dst, int imm32) {
33203343
OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
33213344
}
33223345

3323-
// Insn: VPACKUSWB xmm1, xmm2, xmm3/m128
3324-
// -----
3325-
// Insn: VPACKUSWB xmm1, xmm1, xmm2
3326-
33273346
public final void packuswb(Register dst, Register src) {
3328-
assert inRC(XMM, dst) && inRC(XMM, src);
3329-
// Code: VEX.NDS.128.66.0F.WIG 67 /r
3330-
simdPrefix(dst, dst, src, PD, P_0F, false);
3331-
emitByte(0x67);
3332-
emitModRM(dst, src);
3347+
SSEOp.PACKUSWB.emit(this, PD, dst, src);
33333348
}
33343349

33353350
public final void packusdw(Register dst, Register src) {
3336-
assert inRC(XMM, dst) && inRC(XMM, src);
3337-
// Code: VEX.128.66.0F38 2B /r
3338-
simdPrefix(dst, dst, src, PD, P_0F38, false);
3339-
emitByte(0x2B);
3340-
emitModRM(dst, src);
3351+
SSEOp.PACKUSDW.emit(this, PD, dst, src);
33413352
}
33423353

33433354
public final void pop(Register dst) {
@@ -3350,15 +3361,15 @@ public void popfq() {
33503361
}
33513362

33523363
public final void ptest(Register dst, Register src) {
3353-
assert supports(CPUFeature.SSE4_1);
3364+
GraalError.guarantee(supports(CPUFeature.SSE4_1), "PTEST requires SSE4.1");
33543365
assert inRC(XMM, dst) && inRC(XMM, src);
33553366
simdPrefix(dst, Register.None, src, PD, P_0F38, false);
33563367
emitByte(0x17);
33573368
emitModRM(dst, src);
33583369
}
33593370

33603371
public final void ptest(Register dst, AMD64Address src) {
3361-
assert supports(CPUFeature.SSE4_1);
3372+
GraalError.guarantee(supports(CPUFeature.SSE4_1), "PTEST requires SSE4.1");
33623373
assert inRC(XMM, dst);
33633374
simdPrefix(dst, Register.None, src, PD, P_0F38, false);
33643375
emitByte(0x17);
@@ -3413,6 +3424,18 @@ public final void pcmpeqd(Register dst, AMD64Address src) {
34133424
emitOperandHelper(dst, src, 0);
34143425
}
34153426

3427+
public final void pminub(Register dst, Register src) {
3428+
SSEOp.PMINUB.emit(this, PD, dst, src);
3429+
}
3430+
3431+
public final void pminuw(Register dst, Register src) {
3432+
SSEOp.PMINUW.emit(this, PD, dst, src);
3433+
}
3434+
3435+
public final void pminud(Register dst, Register src) {
3436+
SSEOp.PMINUD.emit(this, PD, dst, src);
3437+
}
3438+
34163439
public final void pcmpgtb(Register dst, Register src) {
34173440
assert supports(CPUFeature.SSE2);
34183441
assert inRC(XMM, dst) && inRC(XMM, src);
@@ -3746,19 +3769,11 @@ public final void psrldq(Register dst, int imm8) {
37463769
}
37473770

37483771
public final void pshufb(Register dst, Register src) {
3749-
GraalError.guarantee(supports(CPUFeature.SSSE3), "pshufb requires SSSE3");
3750-
assert inRC(XMM, dst) && inRC(XMM, src);
3751-
simdPrefix(dst, dst, src, PD, P_0F38, false);
3752-
emitByte(0x00);
3753-
emitModRM(dst, src);
3772+
SSEOp.PSHUFB.emit(this, PD, dst, src);
37543773
}
37553774

37563775
public final void pshufb(Register dst, AMD64Address src) {
3757-
GraalError.guarantee(supports(CPUFeature.SSSE3), "pshufb requires SSSE3");
3758-
assert inRC(XMM, dst);
3759-
simdPrefix(dst, dst, src, PD, P_0F38, false);
3760-
emitByte(0x00);
3761-
emitOperandHelper(dst, src, 0);
3776+
SSEOp.PSHUFB.emit(this, PD, dst, src);
37623777
}
37633778

37643779
public final void pshuflw(Register dst, Register src, int imm8) {
@@ -3781,17 +3796,19 @@ public final void pshufd(Register dst, Register src, int imm8) {
37813796
}
37823797

37833798
public final void psubusb(Register dst, Register src) {
3784-
assert inRC(XMM, dst) && inRC(XMM, src);
3785-
simdPrefix(dst, dst, src, PD, P_0F, false);
3786-
emitByte(0xD8);
3787-
emitModRM(dst, src);
3799+
SSEOp.PSUBUSB.emit(this, PD, dst, src);
37883800
}
37893801

37903802
public final void psubusb(Register dst, AMD64Address src) {
3791-
assert inRC(XMM, dst);
3792-
simdPrefix(dst, dst, src, PD, P_0F, false);
3793-
emitByte(0xD8);
3794-
emitOperandHelper(dst, src, 0);
3803+
SSEOp.PSUBUSB.emit(this, PD, dst, src);
3804+
}
3805+
3806+
public final void psubusw(Register dst, Register src) {
3807+
SSEOp.PSUBUSW.emit(this, PD, dst, src);
3808+
}
3809+
3810+
public final void psubusw(Register dst, AMD64Address src) {
3811+
SSEOp.PSUBUSW.emit(this, PD, dst, src);
37953812
}
37963813

37973814
public final void psubd(Register dst, Register src) {

compiler/src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64MacroAssembler.java

Lines changed: 79 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC;
3333
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC;
3434
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.DWORD;
35+
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.PD;
3536
import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize.QWORD;
3637
import static org.graalvm.compiler.core.common.NumUtil.isByte;
3738

@@ -680,6 +681,10 @@ public final void andqAndJcc(Register dst, int imm32, ConditionFlag cc, Label br
680681
applyMIOpAndJcc(AND.getMIOpcode(QWORD, isByte(imm32)), QWORD, dst, imm32, cc, branchTarget, isShortJmp, false, null);
681682
}
682683

684+
public final void andqAndJcc(Register dst, Register src, ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
685+
applyRMOpAndJcc(AND.getRMOpcode(QWORD), QWORD, dst, src, cc, branchTarget, isShortJmp);
686+
}
687+
683688
public final void addlAndJcc(Register dst, Register src, ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
684689
applyRMOpAndJcc(ADD.getRMOpcode(DWORD), DWORD, dst, src, cc, branchTarget, isShortJmp);
685690
}
@@ -893,19 +898,15 @@ public final void movdqu(AVXSize size, Register dst, Register src) {
893898
* Compares all packed bytes/words/dwords in {@code dst} to {@code src}. Matching values are set
894899
* to all ones (0xff, 0xffff, ...), non-matching values are set to zero.
895900
*/
896-
public final void pcmpeq(AVXSize vectorSize, Stride elementStride, Register dst, Register src) {
897-
pcmpeq(vectorSize, elementStride.value, dst, src);
898-
}
899-
900-
private void pcmpeq(AVXSize vectorSize, int elementSize, Register dst, Register src) {
901+
public final void pcmpeq(AVXSize vectorSize, Stride elementSize, Register dst, Register src) {
901902
switch (elementSize) {
902-
case 1:
903+
case S1:
903904
pcmpeqb(vectorSize, dst, src);
904905
break;
905-
case 2:
906+
case S2:
906907
pcmpeqw(vectorSize, dst, src);
907908
break;
908-
case 4:
909+
case S4:
909910
pcmpeqd(vectorSize, dst, src);
910911
break;
911912
default:
@@ -941,19 +942,15 @@ public final void pcmpeqb(AVXSize size, Register dst, Register src) {
941942
* Compares all packed bytes/words/dwords in {@code dst} to {@code src}. Matching values are set
942943
* to all ones (0xff, 0xffff, ...), non-matching values are set to zero.
943944
*/
944-
public final void pcmpeq(AVXSize size, Stride elementStride, Register dst, AMD64Address src) {
945-
pcmpeq(size, elementStride.value, dst, src);
946-
}
947-
948-
private void pcmpeq(AVXSize vectorSize, int elementSize, Register dst, AMD64Address src) {
945+
public final void pcmpeq(AVXSize vectorSize, Stride elementSize, Register dst, AMD64Address src) {
949946
switch (elementSize) {
950-
case 1:
947+
case S1:
951948
pcmpeqb(vectorSize, dst, src);
952949
break;
953-
case 2:
950+
case S2:
954951
pcmpeqw(vectorSize, dst, src);
955952
break;
956-
case 4:
953+
case S4:
957954
pcmpeqd(vectorSize, dst, src);
958955
break;
959956
default:
@@ -1001,6 +998,57 @@ public final void pcmpgtd(AVXSize size, Register dst, Register src) {
1001998
}
1002999
}
10031000

1001+
public final void pminu(AVXSize vectorSize, Stride elementSize, Register dst, Register src1, Register src2) {
1002+
switch (elementSize) {
1003+
case S1:
1004+
pminub(vectorSize, dst, src1, src2);
1005+
break;
1006+
case S2:
1007+
pminuw(vectorSize, dst, src1, src2);
1008+
break;
1009+
case S4:
1010+
pminud(vectorSize, dst, src1, src2);
1011+
break;
1012+
default:
1013+
throw new UnsupportedOperationException();
1014+
}
1015+
}
1016+
1017+
public final void pminub(AVXSize size, Register dst, Register src1, Register src2) {
1018+
simdRVMOp(VexRVMOp.VPMINUB, SSEOp.PMINUB, size, dst, src1, src2, true);
1019+
}
1020+
1021+
public final void pminuw(AVXSize size, Register dst, Register src1, Register src2) {
1022+
simdRVMOp(VexRVMOp.VPMINUW, SSEOp.PMINUW, size, dst, src1, src2, true);
1023+
}
1024+
1025+
public final void pminud(AVXSize size, Register dst, Register src1, Register src2) {
1026+
simdRVMOp(VexRVMOp.VPMINUD, SSEOp.PMINUD, size, dst, src1, src2, true);
1027+
}
1028+
1029+
private void simdRVMOp(VexRVMOp avxOp, SSEOp sseOp, AVXSize vectorSize, Register dst, Register src1, Register src2, boolean isCommutative) {
1030+
if (isAVX()) {
1031+
avxOp.emit(this, vectorSize, dst, src1, src2);
1032+
} else {
1033+
threeVectorOpSSE(sseOp, dst, src1, src2, isCommutative);
1034+
}
1035+
}
1036+
1037+
private void threeVectorOpSSE(SSEOp op, Register dst, Register src1, Register src2, boolean isCommutative) {
1038+
if (dst.equals(src1)) {
1039+
op.emit(this, PD, dst, src2);
1040+
} else if (dst.equals(src2)) {
1041+
if (isCommutative) {
1042+
op.emit(this, PD, dst, src1);
1043+
} else {
1044+
throw GraalError.shouldNotReachHere("can't simulate non-commutative 3-vector AVX op on SSE when dst == src2!");
1045+
}
1046+
} else {
1047+
movdqu(dst, src1);
1048+
op.emit(this, PD, dst, src2);
1049+
}
1050+
}
1051+
10041052
private static int scaleDisplacement(Stride strideDst, Stride strideSrc, int displacement) {
10051053
if (strideSrc.value < strideDst.value) {
10061054
assert (displacement & ((1 << (strideDst.log2 - strideSrc.log2)) - 1)) == 0;
@@ -1159,19 +1207,19 @@ public final void loadAndExtendSSE(ExtendMode extendMode, Register dst, Stride s
11591207
}
11601208

11611209
public final void packuswb(AVXSize size, Register dst, Register src) {
1162-
if (isAVX()) {
1163-
VexRVMOp.VPACKUSWB.emit(this, size, dst, dst, src);
1164-
} else {
1165-
packuswb(dst, src);
1166-
}
1210+
packuswb(size, dst, dst, src);
1211+
}
1212+
1213+
public final void packuswb(AVXSize size, Register dst, Register src1, Register src2) {
1214+
simdRVMOp(VexRVMOp.VPACKUSWB, SSEOp.PACKUSWB, size, dst, src1, src2, false);
11671215
}
11681216

11691217
public final void packusdw(AVXSize size, Register dst, Register src) {
1170-
if (isAVX()) {
1171-
VexRVMOp.VPACKUSDW.emit(this, size, dst, dst, src);
1172-
} else {
1173-
packusdw(dst, src);
1174-
}
1218+
packusdw(size, dst, dst, src);
1219+
}
1220+
1221+
public final void packusdw(AVXSize size, Register dst, Register src1, Register src2) {
1222+
simdRVMOp(VexRVMOp.VPACKUSDW, SSEOp.PACKUSDW, size, dst, src1, src2, false);
11751223
}
11761224

11771225
public final void palignr(AVXSize size, Register dst, Register src, int imm8) {
@@ -1308,13 +1356,12 @@ public final void psrld(AVXSize size, Register dst, Register src, int imm8) {
13081356
}
13091357
}
13101358

1359+
public final void pshufb(AVXSize size, Register dst, Register src1, Register src2) {
1360+
simdRVMOp(VexRVMOp.VPSHUFB, SSEOp.PSHUFB, size, dst, src1, src2, false);
1361+
}
1362+
13111363
public final void pshufb(AVXSize size, Register dst, Register src) {
1312-
if (isAVX()) {
1313-
VexRVMOp.VPSHUFB.emit(this, size, dst, dst, src);
1314-
} else {
1315-
// SSE
1316-
pshufb(dst, src);
1317-
}
1364+
pshufb(size, dst, dst, src);
13181365
}
13191366

13201367
public final void pshufb(AVXSize size, Register dst, AMD64Address src) {

compiler/src/org.graalvm.compiler.core.aarch64/src/org/graalvm/compiler/core/aarch64/AArch64LIRGenerator.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -613,14 +613,14 @@ public Variable emitArrayEqualsWithMaskDynamicStrides(EnumSet<?> runtimeCheckedC
613613
}
614614

615615
@Override
616-
public Variable emitArrayIndexOf(Stride stride, boolean findTwoConsecutive, boolean withMask, EnumSet<?> runtimeCheckedCPUFeatures,
616+
public Variable emitArrayIndexOf(Stride stride, ArrayIndexOfVariant variant, EnumSet<?> runtimeCheckedCPUFeatures,
617617
Value arrayPointer, Value arrayOffset, Value arrayLength, Value fromIndex, Value... searchValues) {
618618
Variable result = newVariable(LIRKind.value(AArch64Kind.DWORD));
619619
AllocatableValue[] allocatableSearchValues = new AllocatableValue[searchValues.length];
620620
for (int i = 0; i < searchValues.length; i++) {
621621
allocatableSearchValues[i] = asAllocatable(searchValues[i]);
622622
}
623-
append(new AArch64ArrayIndexOfOp(stride, findTwoConsecutive, withMask, this, result, emitConvertNullToZero(arrayPointer), asAllocatable(arrayOffset), asAllocatable(arrayLength),
623+
append(new AArch64ArrayIndexOfOp(stride, variant, this, result, emitConvertNullToZero(arrayPointer), asAllocatable(arrayOffset), asAllocatable(arrayLength),
624624
asAllocatable(fromIndex), allocatableSearchValues));
625625
return result;
626626
}

compiler/src/org.graalvm.compiler.core.amd64/src/org/graalvm/compiler/core/amd64/AMD64LIRGenerator.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -862,10 +862,10 @@ protected int getAVX3Threshold() {
862862

863863
@SuppressWarnings("unchecked")
864864
@Override
865-
public Variable emitArrayIndexOf(Stride stride, boolean findTwoConsecutive, boolean withMask, EnumSet<?> runtimeCheckedCPUFeatures,
865+
public Variable emitArrayIndexOf(Stride stride, ArrayIndexOfVariant variant, EnumSet<?> runtimeCheckedCPUFeatures,
866866
Value arrayPointer, Value arrayOffset, Value arrayLength, Value fromIndex, Value... searchValues) {
867867
Variable result = newVariable(LIRKind.value(AMD64Kind.DWORD));
868-
append(AMD64ArrayIndexOfOp.movParamsAndCreate(stride, findTwoConsecutive, withMask, this, (EnumSet<CPUFeature>) runtimeCheckedCPUFeatures,
868+
append(AMD64ArrayIndexOfOp.movParamsAndCreate(stride, variant, this, (EnumSet<CPUFeature>) runtimeCheckedCPUFeatures,
869869
result, arrayPointer, arrayOffset, arrayLength, fromIndex, searchValues));
870870
return result;
871871
}

0 commit comments

Comments
 (0)