Skip to content

Commit a179590

Browse files
Fei GaoPengfei Li
authored andcommitted
8283091: Support type conversion between different data sizes in SLP
Reviewed-by: kvn, sviswanathan
1 parent f7ba3b7 commit a179590

23 files changed

+1366
-59
lines changed

src/hotspot/cpu/aarch64/aarch64.ad

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2432,6 +2432,19 @@ const bool Matcher::match_rule_supported(int opcode) {
24322432
return ret_value; // Per default match rules are supported.
24332433
}
24342434

2435+
const bool Matcher::match_rule_supported_superword(int opcode, int vlen, BasicType bt) {
2436+
if (UseSVE == 0) {
2437+
// ConvD2I and ConvL2F are not profitable to be vectorized on NEON, because no direct
2438+
// NEON instructions support them. But the match rule support for them is profitable for
2439+
// Vector API intrinsics.
2440+
if ((opcode == Op_VectorCastD2X && bt == T_INT) ||
2441+
(opcode == Op_VectorCastL2X && bt == T_FLOAT)) {
2442+
return false;
2443+
}
2444+
}
2445+
return match_rule_supported_vector(opcode, vlen, bt);
2446+
}
2447+
24352448
// Identify extra cases that we might want to provide match rules for vector nodes and
24362449
// other intrinsics guarded with vector length (vlen) and element type (bt).
24372450
const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {

src/hotspot/cpu/arm/arm.ad

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -981,6 +981,10 @@ const bool Matcher::match_rule_supported(int opcode) {
981981
return true; // Per default match rules are supported.
982982
}
983983

984+
const bool Matcher::match_rule_supported_superword(int opcode, int vlen, BasicType bt) {
985+
return match_rule_supported_vector(opcode, vlen, bt);
986+
}
987+
984988
const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
985989

986990
// TODO

src/hotspot/cpu/ppc/ppc.ad

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2165,6 +2165,10 @@ const bool Matcher::match_rule_supported(int opcode) {
21652165
return true; // Per default match rules are supported.
21662166
}
21672167

2168+
const bool Matcher::match_rule_supported_superword(int opcode, int vlen, BasicType bt) {
2169+
return match_rule_supported_vector(opcode, vlen, bt);
2170+
}
2171+
21682172
const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
21692173
if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
21702174
return false;

src/hotspot/cpu/riscv/riscv.ad

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1815,6 +1815,10 @@ const bool Matcher::match_rule_supported(int opcode) {
18151815
return true; // Per default match rules are supported.
18161816
}
18171817

1818+
const bool Matcher::match_rule_supported_superword(int opcode, int vlen, BasicType bt) {
1819+
return match_rule_supported_vector(opcode, vlen, bt);
1820+
}
1821+
18181822
// Identify extra cases that we might want to provide match rules for vector nodes and
18191823
// other intrinsics guarded with vector length (vlen) and element type (bt).
18201824
const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {

src/hotspot/cpu/s390/s390.ad

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1505,6 +1505,10 @@ const bool Matcher::match_rule_supported(int opcode) {
15051505
return true; // Per default match rules are supported.
15061506
}
15071507

1508+
const bool Matcher::match_rule_supported_superword(int opcode, int vlen, BasicType bt) {
1509+
return match_rule_supported_vector(opcode, vlen, bt);
1510+
}
1511+
15081512
const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
15091513
if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
15101514
return false;

src/hotspot/cpu/x86/x86.ad

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1693,6 +1693,10 @@ static inline bool is_pop_count_instr_target(BasicType bt) {
16931693
(is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
16941694
}
16951695

1696+
const bool Matcher::match_rule_supported_superword(int opcode, int vlen, BasicType bt) {
1697+
return match_rule_supported_vector(opcode, vlen, bt);
1698+
}
1699+
16961700
// Identify extra cases that we might want to provide match rules for vector nodes and
16971701
// other intrinsics guarded with vector length (vlen) and element type (bt).
16981702
const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {

src/hotspot/share/opto/matcher.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -325,6 +325,10 @@ class Matcher : public PhaseTransform {
325325
// should generate this one.
326326
static const bool match_rule_supported(int opcode);
327327

328+
// Identify extra cases that we might want to vectorize automatically
329+
// And exclude cases which are not profitable to auto-vectorize.
330+
static const bool match_rule_supported_superword(int opcode, int vlen, BasicType bt);
331+
328332
// identify extra cases that we might want to provide match rules for
329333
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
330334
static const bool match_rule_supported_vector(int opcode, int vlen, BasicType bt);

src/hotspot/share/opto/superword.cpp

Lines changed: 110 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -374,8 +374,10 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
374374
break;
375375
}
376376

377-
// Map the maximal common vector
378-
if (VectorNode::implemented(n->Opcode(), cur_max_vector, bt)) {
377+
// Map the maximal common vector except conversion nodes, because we can't get
378+
// the precise basic type for conversion nodes in the stage of early analysis.
379+
if (!VectorNode::is_convert_opcode(n->Opcode()) &&
380+
VectorNode::implemented(n->Opcode(), cur_max_vector, bt)) {
379381
if (cur_max_vector < max_vector && !flag_small_bt) {
380382
max_vector = cur_max_vector;
381383
} else if (cur_max_vector > max_vector && UseSubwordForMaxVector) {
@@ -1005,6 +1007,12 @@ int SuperWord::get_vw_bytes_special(MemNode* s) {
10051007
}
10061008
}
10071009

1010+
// Check for special case where there is a type conversion between different data size.
1011+
int vectsize = max_vector_size_in_def_use_chain(s);
1012+
if (vectsize < max_vector_size(btype)) {
1013+
vw = MIN2(vectsize * type2aelembytes(btype), vw);
1014+
}
1015+
10081016
return vw;
10091017
}
10101018

@@ -1193,7 +1201,9 @@ bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
11931201
BasicType bt2 = velt_basic_type(s2);
11941202
if(!is_java_primitive(bt1) || !is_java_primitive(bt2))
11951203
return false;
1196-
if (Matcher::max_vector_size(bt1) < 2) {
1204+
BasicType longer_bt = longer_type_for_conversion(s1);
1205+
if (max_vector_size(bt1) < 2 ||
1206+
(longer_bt != T_ILLEGAL && max_vector_size(longer_bt) < 2)) {
11971207
return false; // No vectors for this type
11981208
}
11991209

@@ -1436,6 +1446,16 @@ void SuperWord::extend_packlist() {
14361446
}
14371447
}
14381448

1449+
//------------------------------adjust_alignment_for_type_conversion---------------------------------
1450+
// Adjust the target alignment if conversion between different data size exists in def-use nodes.
1451+
int SuperWord::adjust_alignment_for_type_conversion(Node* s, Node* t, int align) {
1452+
if (longer_type_for_conversion(s) != T_ILLEGAL ||
1453+
longer_type_for_conversion(t) != T_ILLEGAL) {
1454+
align = align / data_size(s) * data_size(t);
1455+
}
1456+
return align;
1457+
}
1458+
14391459
//------------------------------follow_use_defs---------------------------
14401460
// Extend the packset by visiting operand definitions of nodes in pack p
14411461
bool SuperWord::follow_use_defs(Node_List* p) {
@@ -1447,16 +1467,17 @@ bool SuperWord::follow_use_defs(Node_List* p) {
14471467

14481468
if (s1->is_Load()) return false;
14491469

1450-
int align = alignment(s1);
1451-
NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: s1 %d, align %d", s1->_idx, align);)
1470+
NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: s1 %d, align %d", s1->_idx, alignment(s1));)
14521471
bool changed = false;
14531472
int start = s1->is_Store() ? MemNode::ValueIn : 1;
14541473
int end = s1->is_Store() ? MemNode::ValueIn+1 : s1->req();
14551474
for (int j = start; j < end; j++) {
1475+
int align = alignment(s1);
14561476
Node* t1 = s1->in(j);
14571477
Node* t2 = s2->in(j);
14581478
if (!in_bb(t1) || !in_bb(t2))
14591479
continue;
1480+
align = adjust_alignment_for_type_conversion(s1, t1, align);
14601481
if (stmts_can_pack(t1, t2, align)) {
14611482
if (est_savings(t1, t2) >= 0) {
14621483
Node_List* pair = new Node_List();
@@ -1500,12 +1521,15 @@ bool SuperWord::follow_def_uses(Node_List* p) {
15001521
if (t2->Opcode() == Op_AddI && t2 == _lp->as_CountedLoop()->incr()) continue; // don't mess with the iv
15011522
if (!opnd_positions_match(s1, t1, s2, t2))
15021523
continue;
1503-
if (stmts_can_pack(t1, t2, align)) {
1524+
int adjusted_align = alignment(s1);
1525+
adjusted_align = adjust_alignment_for_type_conversion(s1, t1, adjusted_align);
1526+
if (stmts_can_pack(t1, t2, adjusted_align)) {
15041527
int my_savings = est_savings(t1, t2);
15051528
if (my_savings > savings) {
15061529
savings = my_savings;
15071530
u1 = t1;
15081531
u2 = t2;
1532+
align = adjusted_align;
15091533
}
15101534
}
15111535
}
@@ -1698,8 +1722,7 @@ void SuperWord::combine_packs() {
16981722
for (int i = 0; i < _packset.length(); i++) {
16991723
Node_List* p1 = _packset.at(i);
17001724
if (p1 != NULL) {
1701-
BasicType bt = velt_basic_type(p1->at(0));
1702-
uint max_vlen = max_vector_size(bt); // Max elements in vector
1725+
uint max_vlen = max_vector_size_in_def_use_chain(p1->at(0)); // Max elements in vector
17031726
assert(is_power_of_2(max_vlen), "sanity");
17041727
uint psize = p1->size();
17051728
if (!is_power_of_2(psize)) {
@@ -2022,6 +2045,8 @@ bool SuperWord::implemented(Node_List* p) {
20222045
} else {
20232046
retValue = ReductionNode::implemented(opc, size, arith_type->basic_type());
20242047
}
2048+
} else if (VectorNode::is_convert_opcode(opc)) {
2049+
retValue = VectorCastNode::implemented(opc, size, velt_basic_type(p0->in(1)), velt_basic_type(p0));
20252050
} else {
20262051
// Vector unsigned right shift for signed subword types behaves differently
20272052
// from Java Spec. But when the shift amount is a constant not greater than
@@ -2616,12 +2641,11 @@ bool SuperWord::output() {
26162641
Node* in = vector_opd(p, 1);
26172642
vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n));
26182643
vlen_in_bytes = vn->as_Vector()->length_in_bytes();
2619-
} else if (opc == Op_ConvI2F || opc == Op_ConvL2D ||
2620-
opc == Op_ConvF2I || opc == Op_ConvD2L) {
2644+
} else if (VectorNode::is_convert_opcode(opc)) {
26212645
assert(n->req() == 2, "only one input expected");
26222646
BasicType bt = velt_basic_type(n);
2623-
int vopc = VectorNode::opcode(opc, bt);
26242647
Node* in = vector_opd(p, 1);
2648+
int vopc = VectorCastNode::opcode(in->bottom_type()->is_vect()->element_basic_type());
26252649
vn = VectorCastNode::make(vopc, in, bt, vlen);
26262650
vlen_in_bytes = vn->as_Vector()->length_in_bytes();
26272651
} else if (is_cmov_pack(p)) {
@@ -3134,9 +3158,26 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) {
31343158
return true;
31353159
}
31363160

3137-
31383161
if (u_pk->size() != d_pk->size())
31393162
return false;
3163+
3164+
if (longer_type_for_conversion(use) != T_ILLEGAL) {
3165+
// type conversion takes a type of a kind of size and produces a type of
3166+
// another size - hence the special checks on alignment and size.
3167+
for (uint i = 0; i < u_pk->size(); i++) {
3168+
Node* ui = u_pk->at(i);
3169+
Node* di = d_pk->at(i);
3170+
if (ui->in(u_idx) != di) {
3171+
return false;
3172+
}
3173+
if (alignment(ui) / type2aelembytes(velt_basic_type(ui)) !=
3174+
alignment(di) / type2aelembytes(velt_basic_type(di))) {
3175+
return false;
3176+
}
3177+
}
3178+
return true;
3179+
}
3180+
31403181
for (uint i = 0; i < u_pk->size(); i++) {
31413182
Node* ui = u_pk->at(i);
31423183
Node* di = d_pk->at(i);
@@ -3369,6 +3410,63 @@ void SuperWord::compute_max_depth() {
33693410
}
33703411
}
33713412

3413+
BasicType SuperWord::longer_type_for_conversion(Node* n) {
3414+
int opcode = n->Opcode();
3415+
switch (opcode) {
3416+
case Op_ConvD2I:
3417+
case Op_ConvI2D:
3418+
case Op_ConvF2D:
3419+
case Op_ConvD2F: return T_DOUBLE;
3420+
case Op_ConvF2L:
3421+
case Op_ConvL2F:
3422+
case Op_ConvL2I:
3423+
case Op_ConvI2L: return T_LONG;
3424+
case Op_ConvI2F: {
3425+
BasicType src_t = velt_basic_type(n->in(1));
3426+
if (src_t == T_BYTE || src_t == T_SHORT) {
3427+
return T_FLOAT;
3428+
}
3429+
return T_ILLEGAL;
3430+
}
3431+
case Op_ConvF2I: {
3432+
BasicType dst_t = velt_basic_type(n);
3433+
if (dst_t == T_BYTE || dst_t == T_SHORT) {
3434+
return T_FLOAT;
3435+
}
3436+
return T_ILLEGAL;
3437+
}
3438+
}
3439+
return T_ILLEGAL;
3440+
}
3441+
3442+
int SuperWord::max_vector_size_in_def_use_chain(Node* n) {
3443+
BasicType bt = velt_basic_type(n);
3444+
BasicType vt = bt;
3445+
3446+
// find the longest type among def nodes.
3447+
uint start, end;
3448+
VectorNode::vector_operands(n, &start, &end);
3449+
for (uint i = start; i < end; ++i) {
3450+
Node* input = n->in(i);
3451+
if (!in_bb(input)) continue;
3452+
BasicType newt = longer_type_for_conversion(input);
3453+
vt = (newt == T_ILLEGAL) ? vt : newt;
3454+
}
3455+
3456+
// find the longest type among use nodes.
3457+
for (uint i = 0; i < n->outcnt(); ++i) {
3458+
Node* output = n->raw_out(i);
3459+
if (!in_bb(output)) continue;
3460+
BasicType newt = longer_type_for_conversion(output);
3461+
vt = (newt == T_ILLEGAL) ? vt : newt;
3462+
}
3463+
3464+
int max = max_vector_size(vt);
3465+
// If now there is no vectors for the longest type, the nodes with the longest
3466+
// type in the def-use chain are not packed in SuperWord::stmts_can_pack.
3467+
return max < 2 ? max_vector_size(bt) : max;
3468+
}
3469+
33723470
//-------------------------compute_vector_element_type-----------------------
33733471
// Compute necessary vector element type for expressions
33743472
// This propagates backwards a narrower integer type when the

src/hotspot/share/opto/superword.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,7 @@ class SuperWord : public ResourceObj {
518518
int data_size(Node* s);
519519
// Extend packset by following use->def and def->use links from pack members.
520520
void extend_packlist();
521+
int adjust_alignment_for_type_conversion(Node* s, Node* t, int align);
521522
// Extend the packset by visiting operand definitions of nodes in pack p
522523
bool follow_use_defs(Node_List* p);
523524
// Extend the packset by visiting uses of nodes in pack p
@@ -571,6 +572,10 @@ class SuperWord : public ResourceObj {
571572
void bb_insert_after(Node* n, int pos);
572573
// Compute max depth for expressions from beginning of block
573574
void compute_max_depth();
575+
// Return the longer type for type-conversion node and return illegal type for other nodes.
576+
BasicType longer_type_for_conversion(Node* n);
577+
// Find the longest type in def-use chain for packed nodes, and then compute the max vector size.
578+
int max_vector_size_in_def_use_chain(Node* n);
574579
// Compute necessary vector element type for expressions
575580
void compute_vector_element_type();
576581
// Are s1 and s2 in a pack pair and ordered as s1,s2?

0 commit comments

Comments
 (0)