@@ -3,7 +3,7 @@ use rustc_abi::{BackendRepr, Endian};
33use rustc_apfloat:: ieee:: { Double , Half , Quad , Single } ;
44use rustc_apfloat:: { Float , Round } ;
55use rustc_middle:: mir:: interpret:: { InterpErrorKind , Pointer , UndefinedBehaviorInfo } ;
6- use rustc_middle:: ty:: { FloatTy , SimdAlign } ;
6+ use rustc_middle:: ty:: { FloatTy , ScalarInt , SimdAlign } ;
77use rustc_middle:: { bug, err_ub_format, mir, span_bug, throw_unsup_format, ty} ;
88use rustc_span:: { Symbol , sym} ;
99use tracing:: trace;
@@ -744,6 +744,58 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
744744 self . write_scalar ( val, & dest) ?;
745745 }
746746 }
747+ sym:: simd_funnel_shl | sym:: simd_funnel_shr => {
748+ let ( left, _) = self . project_to_simd ( & args[ 0 ] ) ?;
749+ let ( right, _) = self . project_to_simd ( & args[ 1 ] ) ?;
750+ let ( shift, _) = self . project_to_simd ( & args[ 2 ] ) ?;
751+ let ( dest, _) = self . project_to_simd ( & dest) ?;
752+
753+ let ( len, elem_ty) = args[ 0 ] . layout . ty . simd_size_and_type ( * self . tcx ) ;
754+ let ( elem_size, _signed) = elem_ty. int_size_and_signed ( * self . tcx ) ;
755+ let elem_size_bits = u128:: from ( elem_size. bits ( ) ) ;
756+
757+ let is_left = intrinsic_name == sym:: simd_funnel_shl;
758+
759+ for i in 0 ..len {
760+ let left =
761+ self . read_scalar ( & self . project_index ( & left, i) ?) ?. to_bits ( elem_size) ?;
762+ let right =
763+ self . read_scalar ( & self . project_index ( & right, i) ?) ?. to_bits ( elem_size) ?;
764+ let shift_bits =
765+ self . read_scalar ( & self . project_index ( & shift, i) ?) ?. to_bits ( elem_size) ?;
766+
767+ if shift_bits >= elem_size_bits {
768+ throw_ub_format ! (
769+ "overflowing shift by {shift_bits} in `{intrinsic_name}` in lane {i}"
770+ ) ;
771+ }
772+ let inv_shift_bits = u32:: try_from ( elem_size_bits - shift_bits) . unwrap ( ) ;
773+
774+ // A funnel shift left by S can be implemented as `(x << S) | y.unbounded_shr(SIZE - S)`.
775+ // The `unbounded_shr` is needed because otherwise if `S = 0`, it would be `x | y`
776+ // when it should be `x`.
777+ //
778+ // This selects the least-significant `SIZE - S` bits of `x`, followed by the `S` most
779+ // significant bits of `y`. As `left` and `right` both occupy the lower `SIZE` bits,
780+ // we can treat the lower `SIZE` bits as an integer of the right width and use
781+ // the same implementation, but on a zero-extended `x` and `y`. This works because
782+ // `x << S` just pushes the `SIZE-S` MSBs out, and `y >> (SIZE - S)` shifts in
783+ // zeros, as it is zero-extended. To the lower `SIZE` bits, this looks just like a
784+ // funnel shift left.
785+ //
786+ // Note that the `unbounded_sh{l,r}`s are needed only in case we are using this on
787+ // `u128xN` and `inv_shift_bits == 128`.
788+ let result_bits = if is_left {
789+ ( left << shift_bits) | right. unbounded_shr ( inv_shift_bits)
790+ } else {
791+ left. unbounded_shl ( inv_shift_bits) | ( right >> shift_bits)
792+ } ;
793+ let ( result, _overflow) = ScalarInt :: truncate_from_uint ( result_bits, elem_size) ;
794+
795+ let dest = self . project_index ( & dest, i) ?;
796+ self . write_scalar ( result, & dest) ?;
797+ }
798+ }
747799
748800 // Unsupported intrinsic: skip the return_to_block below.
749801 _ => return interp_ok ( false ) ,
0 commit comments