@@ -511,6 +511,37 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) {
511511 return true ;
512512}
513513
514+ // Following routine generates IR corresponding to AbstractShuffle::partiallyWrapIndex method,
515+ // which partially wraps index by modulo VEC_LENGTH and generates a negative index value if original
516+ // index is out of valid index range [0, VEC_LENGTH)
517+ //
518+ // wrapped_index = (VEC_LENGTH - 1) & index
519+ // if (index u> VEC_LENGTH) {
520+ // wrapped_index -= VEC_LENGTH;
521+ //
522+ // Note: Unsigned greater than comparison treat both <0 and >VEC_LENGTH indices as out-of-bound
523+ // indexes.
524+ Node* LibraryCallKit::partially_wrap_indexes (Node* index_vec, int num_elem, BasicType elem_bt) {
525+ assert (elem_bt == T_BYTE, " Shuffles use byte array based backing storage." );
526+ const TypeVect* vt = TypeVect::make (elem_bt, num_elem);
527+ const Type* type_bt = Type::get_const_basic_type (elem_bt);
528+
529+ Node* mod_mask = gvn ().makecon (TypeInt::make (num_elem-1 ));
530+ Node* bcast_mod_mask = gvn ().transform (VectorNode::scalar2vector (mod_mask, num_elem, type_bt));
531+
532+ BoolTest::mask pred = BoolTest::ugt;
533+ ConINode* pred_node = (ConINode*)gvn ().makecon (TypeInt::make (pred));
534+ Node* lane_cnt = gvn ().makecon (TypeInt::make (num_elem));
535+ Node* bcast_lane_cnt = gvn ().transform (VectorNode::scalar2vector (lane_cnt, num_elem, type_bt));
536+ const TypeVect* vmask_type = TypeVect::makemask (type_bt, num_elem);
537+ Node* mask = gvn ().transform (new VectorMaskCmpNode (pred, bcast_lane_cnt, index_vec, pred_node, vmask_type));
538+
539+ // Make the indices greater than lane count as -ve values to match the java side implementation.
540+ index_vec = gvn ().transform (VectorNode::make (Op_AndV, index_vec, bcast_mod_mask, vt));
541+ Node* biased_val = gvn ().transform (VectorNode::make (Op_SubVB, index_vec, bcast_lane_cnt, vt));
542+ return gvn ().transform (new VectorBlendNode (biased_val, index_vec, mask));
543+ }
544+
514545// <Sh extends VectorShuffle<E>, E>
515546// Sh ShuffleIota(Class<?> E, Class<?> shuffleClass, Vector.Species<E> s, int length,
516547// int start, int step, int wrap, ShuffleIotaOperation<Sh, E> defaultImpl)
@@ -596,18 +627,9 @@ bool LibraryCallKit::inline_vector_shuffle_iota() {
596627
597628 if (do_wrap) {
598629 // Wrap the indices greater than lane count.
599- res = gvn ().transform (VectorNode::make (Op_AndV, res, bcast_mod, vt));
600- } else {
601- ConINode* pred_node = (ConINode*)gvn ().makecon (TypeInt::make (BoolTest::ugt));
602- Node * lane_cnt = gvn ().makecon (TypeInt::make (num_elem));
603- Node * bcast_lane_cnt = gvn ().transform (VectorNode::scalar2vector (lane_cnt, num_elem, type_bt));
604- const TypeVect* vmask_type = TypeVect::makemask (elem_bt, num_elem);
605- Node* mask = gvn ().transform (new VectorMaskCmpNode (BoolTest::ugt, bcast_lane_cnt, res, pred_node, vmask_type));
606-
607- // Make the indices greater than lane count as -ve values to match the java side implementation.
608630 res = gvn ().transform (VectorNode::make (Op_AndV, res, bcast_mod, vt));
609- Node * biased_val = gvn (). transform ( VectorNode::make (Op_SubVB, res, bcast_lane_cnt, vt));
610- res = gvn (). transform ( new VectorBlendNode (biased_val, res, mask) );
631+ } else {
632+ res = partially_wrap_indexes (res, num_elem, elem_bt );
611633 }
612634
613635 ciKlass* sbox_klass = shuffle_klass->const_oop ()->as_instance ()->java_lang_Class_klass ();
@@ -2286,6 +2308,18 @@ bool LibraryCallKit::inline_vector_convert() {
22862308 return false ;
22872309 }
22882310
2311+
2312+ if (is_vector_shuffle (vbox_klass_to) &&
2313+ (!arch_supports_vector (Op_SubVB, num_elem_to, elem_bt_to, VecMaskNotUsed) ||
2314+ !arch_supports_vector (Op_VectorBlend, num_elem_to, elem_bt_to, VecMaskNotUsed) ||
2315+ !arch_supports_vector (Op_VectorMaskCmp, num_elem_to, elem_bt_to, VecMaskNotUsed) ||
2316+ !arch_supports_vector (Op_AndV, num_elem_to, elem_bt_to, VecMaskNotUsed) ||
2317+ !arch_supports_vector (Op_Replicate, num_elem_to, elem_bt_to, VecMaskNotUsed))) {
2318+ log_if_needed (" ** not supported: arity=1 op=shuffle_index_wrap vlen2=%d etype2=%s" ,
2319+ num_elem_to, type2name (elem_bt_to));
2320+ return false ;
2321+ }
2322+
22892323 // At this point, we know that both input and output vector registers are supported
22902324 // by the architecture. Next check if the casted type is simply to same type - which means
22912325 // that it is actually a resize and not a cast.
@@ -2383,6 +2417,10 @@ bool LibraryCallKit::inline_vector_convert() {
23832417 op = gvn ().transform (new VectorReinterpretNode (op, src_type, dst_type));
23842418 }
23852419
2420+ if (is_vector_shuffle (vbox_klass_to)) {
2421+ op = partially_wrap_indexes (op, num_elem_to, elem_bt_to);
2422+ }
2423+
23862424 const TypeInstPtr* vbox_type_to = TypeInstPtr::make_exact (TypePtr::NotNull, vbox_klass_to);
23872425 Node* vbox = box_vector (op, vbox_type_to, elem_bt_to, num_elem_to);
23882426 set_result (vbox);
0 commit comments