Skip to content

Commit c355902

Browse files
e1iuPengfei Li
authored andcommitted
8282528: AArch64: Incorrect replicate2L_zero rule
Reviewed-by: aph
1 parent 5691a3b commit c355902

File tree

7 files changed

+463
-104
lines changed

7 files changed

+463
-104
lines changed

src/hotspot/cpu/aarch64/aarch64_neon.ad

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3262,16 +3262,14 @@ instruct replicate2L(vecX dst, iRegL src)
32623262
ins_pipe(vdup_reg_reg128);
32633263
%}
32643264

3265-
instruct replicate2L_zero(vecX dst, immI0 zero)
3265+
instruct replicate2L_imm(vecX dst, immL con)
32663266
%{
32673267
predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
3268-
match(Set dst (ReplicateI zero));
3268+
match(Set dst (ReplicateL con));
32693269
ins_cost(INSN_COST);
3270-
format %{ "movi $dst, $zero\t# vector (4I)" %}
3270+
format %{ "movi $dst, $con\t# vector (2L)" %}
32713271
ins_encode %{
3272-
__ eor(as_FloatRegister($dst$$reg), __ T16B,
3273-
as_FloatRegister($dst$$reg),
3274-
as_FloatRegister($dst$$reg));
3272+
__ mov(as_FloatRegister($dst$$reg), __ T2D, $con$$constant);
32753273
%}
32763274
ins_pipe(vmovi_reg_imm128);
32773275
%}
@@ -3283,8 +3281,7 @@ instruct replicate2F(vecD dst, vRegF src)
32833281
ins_cost(INSN_COST);
32843282
format %{ "dup $dst, $src\t# vector (2F)" %}
32853283
ins_encode %{
3286-
__ dup(as_FloatRegister($dst$$reg), __ T2S,
3287-
as_FloatRegister($src$$reg));
3284+
__ dup(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
32883285
%}
32893286
ins_pipe(vdup_reg_freg64);
32903287
%}
@@ -3296,8 +3293,7 @@ instruct replicate4F(vecX dst, vRegF src)
32963293
ins_cost(INSN_COST);
32973294
format %{ "dup $dst, $src\t# vector (4F)" %}
32983295
ins_encode %{
3299-
__ dup(as_FloatRegister($dst$$reg), __ T4S,
3300-
as_FloatRegister($src$$reg));
3296+
__ dup(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
33013297
%}
33023298
ins_pipe(vdup_reg_freg128);
33033299
%}
@@ -3309,8 +3305,7 @@ instruct replicate2D(vecX dst, vRegD src)
33093305
ins_cost(INSN_COST);
33103306
format %{ "dup $dst, $src\t# vector (2D)" %}
33113307
ins_encode %{
3312-
__ dup(as_FloatRegister($dst$$reg), __ T2D,
3313-
as_FloatRegister($src$$reg));
3308+
__ dup(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
33143309
%}
33153310
ins_pipe(vdup_reg_dreg128);
33163311
%}

src/hotspot/cpu/aarch64/aarch64_neon_ad.m4

Lines changed: 56 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1559,56 +1559,62 @@ VFABD(fabd, fabd, 2, F, D, S, 64)
15591559
VFABD(fabd, fabd, 4, F, X, S, 128)
15601560
VFABD(fabd, fabd, 2, D, X, D, 128)
15611561
dnl
1562-
define(`VREPLICATE', `
1563-
instruct replicate$3$4$5`'(vec$6 dst, $7 ifelse($7, immI0, zero, $7, immI, con, src))
1564-
%{
1565-
predicate(UseSVE == 0 && ifelse($8, `',
1566-
n->as_Vector()->length() == $3,
1567-
(n->as_Vector()->length() == $3 ||`
1568-
'n->as_Vector()->length() == $8)));
1569-
match(Set dst (Replicate`'ifelse($7, immI0, I, $4) ifelse($7, immI0, zero, $7, immI, con, $7, zero, I, src)));
1570-
ins_cost(INSN_COST);
1571-
format %{ "$1 $dst, $ifelse($7, immI0, zero, $7, immI, con, src)`\t# vector ('ifelse($4$7, SimmI, $3H, $2, eor, 4I, $3$4)`)"' %}
1572-
ins_encode %{
1573-
__ $2(as_FloatRegister($dst$$reg), __ ifelse(
1574-
$2, eor, T16B, T$3`'$9),ifelse(
1575-
`$4 $7', `B immI', ` '$con$$constant & 0xff,
1576-
`$4 $7', `S immI', ` '$con$$constant & 0xffff,
1577-
`$4 $7', `I immI', ` '$con$$constant,
1578-
`$2', eor,`
1579-
as_FloatRegister($dst$$reg),
1580-
as_FloatRegister($dst$$reg)',
1581-
`$7', vRegF,`
1582-
as_FloatRegister($src$$reg)',
1583-
`$7', vRegD,`
1584-
as_FloatRegister($src$$reg)',
1585-
` 'as_Register($src$$reg)));
1586-
%}
1587-
ins_pipe(ifelse($7, immI0, v$1_reg_imm,
1588-
$7, immI, v$1_reg_imm,
1589-
$7, iRegIorL2I, v$1_reg_reg,
1590-
$7, zero, vmovi_reg_imm,
1591-
$7, iRegL, vdup_reg_reg,
1592-
$4, F, vdup_reg_freg, vdup_reg_dreg)`'ifelse($6, X, 128, 64));
1593-
%}')dnl
1594-
dnl $1 $2 $3 $4 $5 $6 $7 $8 $9
1595-
VREPLICATE(dup, dup, 8, B, , D, iRegIorL2I, 4, B)
1596-
VREPLICATE(dup, dup, 16, B, , X, iRegIorL2I, , B)
1597-
VREPLICATE(movi, mov, 8, B, _imm, D, immI, 4, B)
1598-
VREPLICATE(movi, mov, 16, B, _imm, X, immI, , B)
1599-
VREPLICATE(dup, dup, 4, S, , D, iRegIorL2I, 2, H)
1600-
VREPLICATE(dup, dup, 8, S, , X, iRegIorL2I, , H)
1601-
VREPLICATE(movi, mov, 4, S, _imm, D, immI, 2, H)
1602-
VREPLICATE(movi, mov, 8, S, _imm, X, immI, , H)
1603-
VREPLICATE(dup, dup, 2, I, , D, iRegIorL2I, , S)
1604-
VREPLICATE(dup, dup, 4, I, , X, iRegIorL2I, , S)
1605-
VREPLICATE(movi, mov, 2, I, _imm, D, immI, , S)
1606-
VREPLICATE(movi, mov, 4, I, _imm, X, immI, , S)
1607-
VREPLICATE(dup, dup, 2, L, , X, iRegL, , D)
1608-
VREPLICATE(movi, eor, 2, L, _zero, X, immI0, , D)
1609-
VREPLICATE(dup, dup, 2, F, , D, vRegF, , S)
1610-
VREPLICATE(dup, dup, 4, F, , X, vRegF, , S)
1611-
VREPLICATE(dup, dup, 2, D, , X, vRegD, , D)
1562+
define(`VREPLICATE_REG', `
1563+
instruct replicate$2$3`'(vec$4 dst, $5 src)
1564+
%{
1565+
predicate(UseSVE == 0 && ifelse($2$3, 8B,
1566+
`(n->as_Vector()->length() == 8 ||
1567+
n->as_Vector()->length() == 4)',
1568+
$2$3, 4S,
1569+
`(n->as_Vector()->length() == 4 ||
1570+
n->as_Vector()->length() == 2)',
1571+
n->as_Vector()->length() == $2));
1572+
match(Set dst (Replicate$3 src));
1573+
ins_cost(INSN_COST);
1574+
format %{ "dup $dst, $src\t# vector ($2$3)" %}
1575+
ins_encode %{
1576+
__ dup(as_FloatRegister($dst$$reg), __ T$2$1, $6($src$$reg));
1577+
%}
1578+
ins_pipe(ifelse($5, iRegIorL2I, vdup_reg_reg,
1579+
$5, iRegL, vdup_reg_reg,
1580+
$3, F, vdup_reg_freg, vdup_reg_dreg)`'ifelse($4, X, 128, 64));
1581+
%}')dnl
1582+
define(`VREPLICATE_IMM', `
1583+
instruct replicate$2$3_imm`'(vec$4 dst, $5 con)
1584+
%{
1585+
predicate(UseSVE == 0 && ifelse($2$3, 8B,
1586+
`(n->as_Vector()->length() == 8 ||
1587+
n->as_Vector()->length() == 4)',
1588+
$2$3, 4S,
1589+
`(n->as_Vector()->length() == 4 ||
1590+
n->as_Vector()->length() == 2)',
1591+
n->as_Vector()->length() == $2));
1592+
match(Set dst (Replicate$3 con));
1593+
ins_cost(INSN_COST);
1594+
format %{ "movi $dst, $con\t`#' vector ($2`'ifelse($3, S, H, $3))" %}
1595+
ins_encode %{
1596+
__ mov(as_FloatRegister($dst$$reg), __ T$2`'iTYPE2SIMD($3), $con$$constant`'$6);
1597+
%}
1598+
ins_pipe(vmovi_reg_imm`'ifelse($4, X, 128, 64));
1599+
%}')dnl
1600+
dnl $1 $2 $3 $4 $5 $6
1601+
VREPLICATE_REG(B, 8, B, D, iRegIorL2I, as_Register)
1602+
VREPLICATE_REG(B, 16, B, X, iRegIorL2I, as_Register)
1603+
VREPLICATE_IMM(B, 8, B, D, immI, ` & 0xff')
1604+
VREPLICATE_IMM(B, 16, B, X, immI, ` & 0xff')
1605+
VREPLICATE_REG(H, 4, S, D, iRegIorL2I, as_Register)
1606+
VREPLICATE_REG(H, 8, S, X, iRegIorL2I, as_Register)
1607+
VREPLICATE_IMM(H, 4, S, D, immI, ` & 0xffff')
1608+
VREPLICATE_IMM(H, 8, S, X, immI, ` & 0xffff')
1609+
VREPLICATE_REG(S, 2, I, D, iRegIorL2I, as_Register)
1610+
VREPLICATE_REG(S, 4, I, X, iRegIorL2I, as_Register)
1611+
VREPLICATE_IMM(S, 2, I, D, immI)
1612+
VREPLICATE_IMM(S, 4, I, X, immI)
1613+
VREPLICATE_REG(D, 2, L, X, iRegL, as_Register)
1614+
VREPLICATE_IMM(D, 2, L, X, immL)
1615+
VREPLICATE_REG(S, 2, F, D, vRegF, as_FloatRegister)
1616+
VREPLICATE_REG(S, 4, F, X, vRegF, as_FloatRegister)
1617+
VREPLICATE_REG(D, 2, D, X, vRegD, as_FloatRegister)
16121618
dnl
16131619

16141620
// ====================REDUCTION ARITHMETIC====================================

src/hotspot/cpu/aarch64/assembler_aarch64.cpp

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
33
* Copyright (c) 2014, 2020 Red Hat Inc. All rights reserved.
44
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
55
*
@@ -355,6 +355,53 @@ bool Assembler::operand_valid_for_logical_immediate(bool is32, uint64_t imm) {
355355
return encode_logical_immediate(is32, imm) != 0xffffffff;
356356
}
357357

358+
// Check immediate encoding for movi.
359+
// Return the shift amount which can be {0, 8, 16, 24} for B/H/S types. As the D type
360+
// movi does not have shift variant, in this case the return value is the immediate
361+
// after encoding.
362+
// Return -1 if the input imm64 can not be encoded.
363+
int Assembler::operand_valid_for_movi_immediate(uint64_t imm64, SIMD_Arrangement T) {
364+
if (T == T1D || T == T2D) {
365+
// To encode into movi, the 64-bit imm must be in the form of
366+
// 'aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh'
367+
// and encoded in "a:b:c:d:e:f:g:h".
368+
uint64_t tmp = imm64;
369+
uint64_t one_byte = 0;
370+
for (int i = 0; i < 8; i++) {
371+
one_byte = tmp & 0xffULL;
372+
if (one_byte != 0xffULL && one_byte != 0) {
373+
return -1; // can not be encoded
374+
}
375+
tmp = tmp >> 8;
376+
}
377+
378+
imm64 &= 0x0101010101010101ULL;
379+
imm64 |= (imm64 >> 7);
380+
imm64 |= (imm64 >> 14);
381+
imm64 |= (imm64 >> 28);
382+
383+
return imm64 & 0xff;
384+
}
385+
386+
uint32_t imm32 = imm64 & 0xffffffffULL;
387+
if (T == T8B || T == T16B) { // 8-bit variant
388+
if (0 == (imm32 & ~0xff)) return 0;
389+
} else if(T == T4H || T == T8H) { // 16-bit variant
390+
if (0 == (imm32 & ~0xff)) return 0;
391+
if (0 == (imm32 & ~0xff00)) return 8;
392+
} else if (T == T2S || T == T4S) { // 32-bit variant
393+
if (0 == (imm32 & ~0xff)) return 0;
394+
if (0 == (imm32 & ~0xff00)) return 8;
395+
if (0 == (imm32 & ~0xff0000)) return 16;
396+
if (0 == (imm32 & ~0xff000000)) return 24;
397+
} else {
398+
assert(false, "unsupported");
399+
ShouldNotReachHere();
400+
}
401+
402+
return -1;
403+
}
404+
358405
bool Assembler::operand_valid_for_sve_logical_immediate(unsigned elembits, uint64_t imm) {
359406
return encode_sve_logical_immediate(elembits, imm) != 0xffffffff;
360407
}

src/hotspot/cpu/aarch64/assembler_aarch64.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3760,6 +3760,7 @@ void sve_cmp(Condition cond, PRegister Pd, SIMD_RegVariant T,
37603760
static bool operand_valid_for_add_sub_immediate(int64_t imm);
37613761
static bool operand_valid_for_sve_add_sub_immediate(int64_t imm);
37623762
static bool operand_valid_for_float_immediate(double imm);
3763+
static int operand_valid_for_movi_immediate(uint64_t imm64, SIMD_Arrangement T);
37633764

37643765
void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
37653766
void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

Lines changed: 35 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1347,48 +1347,43 @@ void MacroAssembler::movptr(Register r, uintptr_t imm64) {
13471347
}
13481348

13491349
// Macro to mov replicated immediate to vector register.
1350-
// Vd will get the following values for different arrangements in T
1351-
// imm32 == hex 000000gh T8B: Vd = ghghghghghghghgh
1352-
// imm32 == hex 000000gh T16B: Vd = ghghghghghghghghghghghghghghghgh
1353-
// imm32 == hex 0000efgh T4H: Vd = efghefghefghefgh
1354-
// imm32 == hex 0000efgh T8H: Vd = efghefghefghefghefghefghefghefgh
1355-
// imm32 == hex abcdefgh T2S: Vd = abcdefghabcdefgh
1356-
// imm32 == hex abcdefgh T4S: Vd = abcdefghabcdefghabcdefghabcdefgh
1357-
// T1D/T2D: invalid
1358-
void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, uint32_t imm32) {
1359-
assert(T != T1D && T != T2D, "invalid arrangement");
1360-
if (T == T8B || T == T16B) {
1361-
assert((imm32 & ~0xff) == 0, "extraneous bits in unsigned imm32 (T8B/T16B)");
1362-
movi(Vd, T, imm32 & 0xff, 0);
1350+
// imm64: only the lower 8/16/32 bits are considered for B/H/S type. That is,
1351+
// the upper 56/48/32 bits must be zeros for B/H/S type.
1352+
// Vd will get the following values for different arrangements in T
1353+
// imm64 == hex 000000gh T8B: Vd = ghghghghghghghgh
1354+
// imm64 == hex 000000gh T16B: Vd = ghghghghghghghghghghghghghghghgh
1355+
// imm64 == hex 0000efgh T4H: Vd = efghefghefghefgh
1356+
// imm64 == hex 0000efgh T8H: Vd = efghefghefghefghefghefghefghefgh
1357+
// imm64 == hex abcdefgh T2S: Vd = abcdefghabcdefgh
1358+
// imm64 == hex abcdefgh T4S: Vd = abcdefghabcdefghabcdefghabcdefgh
1359+
// imm64 == hex abcdefgh T1D: Vd = 00000000abcdefgh
1360+
// imm64 == hex abcdefgh T2D: Vd = 00000000abcdefgh00000000abcdefgh
1361+
// Clobbers rscratch1
1362+
void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, uint64_t imm64) {
1363+
assert(T != T1Q, "unsupported");
1364+
if (T == T1D || T == T2D) {
1365+
int imm = operand_valid_for_movi_immediate(imm64, T);
1366+
if (-1 != imm) {
1367+
movi(Vd, T, imm);
1368+
} else {
1369+
mov(rscratch1, imm64);
1370+
dup(Vd, T, rscratch1);
1371+
}
13631372
return;
13641373
}
1365-
uint32_t nimm32 = ~imm32;
1366-
if (T == T4H || T == T8H) {
1367-
assert((imm32 & ~0xffff) == 0, "extraneous bits in unsigned imm32 (T4H/T8H)");
1368-
imm32 &= 0xffff;
1369-
nimm32 &= 0xffff;
1370-
}
1371-
uint32_t x = imm32;
1372-
int movi_cnt = 0;
1373-
int movn_cnt = 0;
1374-
while (x) { if (x & 0xff) movi_cnt++; x >>= 8; }
1375-
x = nimm32;
1376-
while (x) { if (x & 0xff) movn_cnt++; x >>= 8; }
1377-
if (movn_cnt < movi_cnt) imm32 = nimm32;
1378-
unsigned lsl = 0;
1379-
while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
1380-
if (movn_cnt < movi_cnt)
1381-
mvni(Vd, T, imm32 & 0xff, lsl);
1382-
else
1383-
movi(Vd, T, imm32 & 0xff, lsl);
1384-
imm32 >>= 8; lsl += 8;
1385-
while (imm32) {
1386-
while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
1387-
if (movn_cnt < movi_cnt)
1388-
bici(Vd, T, imm32 & 0xff, lsl);
1389-
else
1390-
orri(Vd, T, imm32 & 0xff, lsl);
1391-
lsl += 8; imm32 >>= 8;
1374+
1375+
#ifdef ASSERT
1376+
if (T == T8B || T == T16B) assert((imm64 & ~0xff) == 0, "extraneous bits (T8B/T16B)");
1377+
if (T == T4H || T == T8H) assert((imm64 & ~0xffff) == 0, "extraneous bits (T4H/T8H)");
1378+
if (T == T2S || T == T4S) assert((imm64 & ~0xffffffff) == 0, "extraneous bits (T2S/T4S)");
1379+
#endif
1380+
int shift = operand_valid_for_movi_immediate(imm64, T);
1381+
uint32_t imm32 = imm64 & 0xffffffffULL;
1382+
if (shift >= 0) {
1383+
movi(Vd, T, (imm32 >> shift) & 0xff, shift);
1384+
} else {
1385+
movw(rscratch1, imm32);
1386+
dup(Vd, T, rscratch1);
13921387
}
13931388
}
13941389

src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,7 @@ class MacroAssembler: public Assembler {
508508

509509
void movptr(Register r, uintptr_t imm64);
510510

511-
void mov(FloatRegister Vd, SIMD_Arrangement T, uint32_t imm32);
511+
void mov(FloatRegister Vd, SIMD_Arrangement T, uint64_t imm64);
512512

513513
void mov(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {
514514
orr(Vd, T, Vn, Vn);

0 commit comments

Comments
 (0)